Example usage for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/**
 * Make a path relative with respect to a root path. absPath is always assumed
 * to descend from root. Otherwise returned path is null.
 *///ww w.j  a  v  a 2  s.co m
static String makeRelative(final Path root, final Path absPath) {
    if (!absPath.isAbsolute()) {
        throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
    }
    String p = absPath.toUri().getPath();

    StringTokenizer pathTokens = new StringTokenizer(p, "/");
    for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens
            .hasMoreTokens();) {
        if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
            return null;
        }
    }
    StringBuilder sb = new StringBuilder();
    for (; pathTokens.hasMoreTokens();) {
        sb.append(pathTokens.nextToken());
        if (pathTokens.hasMoreTokens()) {
            sb.append(Path.SEPARATOR);
        }
    }
    return sb.length() == 0 ? "." : sb.toString();
}

From source file:fr.jetoile.hadoopunit.component.OozieBootstrap.java

License:Apache License

public void createShareLib() {

    if (!oozieShareLibCreate) {
        LOGGER.info("OOZIE: Share Lib Create Disabled... skipping");
    } else {/*  w w w .  ja v a  2 s.co  m*/

        try {
            // Get and extract the oozie release
            String oozieExtractTempDir = extractOozieTarFileToTempDir(
                    new File(oozieShareLibPath + Path.SEPARATOR + oozieShareLibName));

            // Extract the sharelib tarball to a temp dir
            fullOozieShareLibTarFilePath = oozieExtractTempDir + Path.SEPARATOR + "oozie-"
                    + getOozieVersionFromOozieTarFileName() + Path.SEPARATOR + "oozie-sharelib-"
                    + getOozieVersionFromOozieTarFileName() + ".tar.gz";
            ;

            oozieShareLibExtractTempDir = extractOozieShareLibTarFileToTempDir(
                    new File(fullOozieShareLibTarFilePath));

            // Copy the sharelib into HDFS
            Path destPath = new Path(
                    oozieHdfsShareLibDir + Path.SEPARATOR + SHARE_LIB_PREFIX + getTimestampDirectory());
            LOGGER.info("OOZIE: Writing share lib contents to: {}", destPath);
            FileSystem hdfsFileSystem = null;
            try {
                hdfsFileSystem = ((HdfsBootstrap) HadoopBootstrap.INSTANCE.getService(Component.HDFS))
                        .getHdfsFileSystemHandle();
            } catch (Exception e) {
                LOGGER.error("unable to get hdfs client");
            }
            hdfsFileSystem.copyFromLocalFile(false, new Path(new File(oozieShareLibExtractTempDir).toURI()),
                    destPath);

            //                if (purgeLocalShareLibCache) {
            //                    FileUtils.deleteDirectory(new File(shareLibCacheDir));
            //                }

        } catch (IOException e) {
            LOGGER.error("unable to copy oozie sharelib into hdfs");
        }
    }
}

From source file:fr.jetoile.hadoopunit.HadoopUtils.java

License:Apache License

public static void setHadoopHome() {

    // Set hadoop.home.dir to point to the windows lib dir
    if (System.getProperty("os.name").startsWith("Windows")) {

        if (StringUtils.isEmpty(System.getenv("HADOOP_HOME"))) {

            try {
                configuration = new PropertiesConfiguration(HadoopUnitConfig.DEFAULT_PROPS_FILE);
            } catch (ConfigurationException e) {
                LOG.error("unable to load {}", HadoopUnitConfig.DEFAULT_PROPS_FILE, e);
            }//from  w w w .  j av  a  2  s  .  c  om

            String hadoop_home = configuration.getString("HADOOP_HOME");

            LOG.info("Setting hadoop.home.dir: {}", hadoop_home);
            if (hadoop_home == null) {
                LOG.error("HADOOP_HOME should be set or informed into hadoop-unit-default.properties");
                System.exit(-1);
            } else {
                System.setProperty("HADOOP_HOME", hadoop_home);
            }

        } else {
            System.setProperty("HADOOP_HOME", System.getenv("HADOOP_HOME"));
        }

        String windowsLibDir = System.getenv("HADOOP_HOME");

        LOG.info("WINDOWS: Setting hadoop.home.dir: {}", windowsLibDir);
        System.setProperty("hadoop.home.dir", windowsLibDir);
        System.load(new File(windowsLibDir + Path.SEPARATOR + "bin" + Path.SEPARATOR + "hadoop.dll")
                .getAbsolutePath());
        System.load(new File(windowsLibDir + Path.SEPARATOR + "bin" + Path.SEPARATOR + "hdfs.dll")
                .getAbsolutePath());
    }
}

From source file:gobblin.cluster.GobblinClusterUtils.java

License:Apache License

/**
 * Get the application working directory {@link String}.
 *
 * @param applicationName the application name
 * @param applicationId the application ID in string form
 * @return the cluster application working directory {@link String}
 *//*from  w  w w . j a va 2 s  . co  m*/
public static String getAppWorkDirPath(String applicationName, String applicationId) {
    return applicationName + Path.SEPARATOR + applicationId;
}

From source file:gobblin.cluster.GobblinHelixJobLauncher.java

License:Apache License

public GobblinHelixJobLauncher(Properties jobProps, final HelixManager helixManager, Path appWorkDir,
        List<? extends Tag<?>> metadataTags) throws Exception {
    super(jobProps, metadataTags);

    this.helixManager = helixManager;
    this.helixTaskDriver = new TaskDriver(this.helixManager);

    this.appWorkDir = appWorkDir;
    this.inputWorkUnitDir = new Path(appWorkDir, GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME);
    this.outputTaskStateDir = new Path(this.appWorkDir,
            GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + Path.SEPARATOR
                    + this.jobContext.getJobId());

    this.helixQueueName = this.jobContext.getJobName();
    this.jobResourceName = TaskUtil.getNamespacedJobName(this.helixQueueName, this.jobContext.getJobId());

    this.jobContext.getJobState().setJobLauncherType(LauncherTypeEnum.CLUSTER);

    this.stateSerDeRunnerThreads = Integer
            .parseInt(jobProps.getProperty(ParallelRunner.PARALLEL_RUNNER_THREADS_KEY,
                    Integer.toString(ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS)));

    jobConfig = ConfigUtils.propertiesToConfig(jobProps);

    this.stateStores = new StateStores(jobConfig, appWorkDir,
            GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME, appWorkDir,
            GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME);

    URI fsUri = URI.create(jobProps.getProperty(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI));
    this.fs = FileSystem.get(fsUri, new Configuration());

    this.taskStateCollectorService = new TaskStateCollectorService(jobProps, this.jobContext.getJobState(),
            this.eventBus, this.stateStores.taskStateStore, outputTaskStateDir);
}

From source file:gobblin.cluster.GobblinHelixTaskTest.java

License:Apache License

private void prepareWorkUnit(WorkUnit workUnit) {
    workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, TestHelper.TEST_TASK_ID);
    workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY,
            Long.toString(Id.parse(TestHelper.TEST_TASK_ID).getSequence()));
    workUnit.setProp(ConfigurationKeys.SOURCE_CLASS_KEY, SimpleJsonSource.class.getName());
    workUnit.setProp(ConfigurationKeys.CONVERTER_CLASSES_KEY, SimpleJsonConverter.class.getName());
    workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.AVRO.toString());
    workUnit.setProp(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY,
            Destination.DestinationType.HDFS.toString());
    workUnit.setProp(ConfigurationKeys.WRITER_STAGING_DIR,
            this.appWorkDir.toString() + Path.SEPARATOR + "staging");
    workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, this.taskOutputDir.toString());
    workUnit.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestHelper.WRITER_FILE_NAME);
    workUnit.setProp(ConfigurationKeys.WRITER_FILE_PATH, TestHelper.REL_WRITER_FILE_PATH);
    workUnit.setProp(ConfigurationKeys.WRITER_BUILDER_CLASS, AvroDataWriterBuilder.class.getName());
    workUnit.setProp(ConfigurationKeys.SOURCE_SCHEMA, TestHelper.SOURCE_SCHEMA);
}

From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java

License:Apache License

/**
 * Populate the avro to orc conversion queries. The Queries will be added to {@link QueryBasedHiveConversionEntity#getQueries()}
 *///w w w.jav  a  2s  . c om
@Override
public Iterable<QueryBasedHiveConversionEntity> convertRecord(Schema outputAvroSchema,
        QueryBasedHiveConversionEntity conversionEntity, WorkUnitState workUnit)
        throws DataConversionException {

    Preconditions.checkNotNull(outputAvroSchema, "Avro schema must not be null");
    Preconditions.checkNotNull(conversionEntity, "Conversion entity must not be null");
    Preconditions.checkNotNull(workUnit, "Workunit state must not be null");
    Preconditions.checkNotNull(conversionEntity.getHiveTable(),
            "Hive table within conversion entity must not be null");

    EventWorkunitUtils.setBeginDDLBuildTimeMetadata(workUnit, System.currentTimeMillis());

    this.hiveDataset = conversionEntity.getConvertibleHiveDataset();

    if (!hasConversionConfig()) {
        return new SingleRecordIterable<>(conversionEntity);
    }

    // Avro table name and location
    String avroTableName = conversionEntity.getHiveTable().getTableName();

    // ORC table name and location
    String orcTableName = getConversionConfig().getDestinationTableName();
    String orcStagingTableName = getOrcStagingTableName(getConversionConfig().getDestinationStagingTableName());
    String orcTableDatabase = getConversionConfig().getDestinationDbName();
    String orcDataLocation = getOrcDataLocation();
    String orcStagingDataLocation = getOrcStagingDataLocation(orcStagingTableName);
    boolean isEvolutionEnabled = getConversionConfig().isEvolutionEnabled();
    Pair<Optional<Table>, Optional<List<Partition>>> destinationMeta = getDestinationTableMeta(orcTableDatabase,
            orcTableName, workUnit);
    Optional<Table> destinationTableMeta = destinationMeta.getLeft();

    // Optional
    // wrapperViewName          : If specified view with 'wrapperViewName' is created if not already exists
    //                            over destination table
    // isUpdateViewAlwaysEnabled: If false 'wrapperViewName' is only updated when schema evolves; if true
    //                            'wrapperViewName' is always updated (everytime publish happens)
    Optional<String> wrapperViewName = getConversionConfig().getDestinationViewName();
    boolean shouldUpdateView = getConversionConfig().isUpdateViewAlwaysEnabled();
    Optional<List<String>> clusterBy = getConversionConfig().getClusterBy().isEmpty()
            ? Optional.<List<String>>absent()
            : Optional.of(getConversionConfig().getClusterBy());
    Optional<Integer> numBuckets = getConversionConfig().getNumBuckets();
    Optional<Integer> rowLimit = getConversionConfig().getRowLimit();
    Properties tableProperties = getConversionConfig().getDestinationTableProperties();

    // Partition dir hint helps create different directory for hourly and daily partition with same timestamp, such as:
    // .. daily_2016-01-01-00 and hourly_2016-01-01-00
    // This helps existing hourly data from not being deleted at the time of roll up, and so Hive queries in flight
    // .. do not fail
    List<String> sourceDataPathIdentifier = getConversionConfig().getSourceDataPathIdentifier();

    // Populate optional partition info
    Map<String, String> partitionsDDLInfo = Maps.newHashMap();
    Map<String, String> partitionsDMLInfo = Maps.newHashMap();
    populatePartitionInfo(conversionEntity, partitionsDDLInfo, partitionsDMLInfo);

    /*
     * Create ORC data location with the same permissions as Avro data
     *
     * Note that hive can also automatically create the non-existing directories but it does not
     * seem to create it with the desired permissions.
     * According to hive docs permissions for newly created directories/files can be controlled using uMask like,
     *
     * SET hive.warehouse.subdir.inherit.perms=false;
     * SET fs.permissions.umask-mode=022;
     * Upon testing, this did not work
     */
    try {
        FileStatus sourceDataFileStatus = this.fs
                .getFileStatus(conversionEntity.getHiveTable().getDataLocation());
        FsPermission sourceDataPermission = sourceDataFileStatus.getPermission();
        if (!this.fs.mkdirs(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission)) {
            throw new RuntimeException(String.format("Failed to create path %s with permissions %s",
                    new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission));
        } else {
            this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()),
                    sourceDataPermission);
            // Set the same group as source
            if (!workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP,
                    DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) {
                this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null,
                        sourceDataFileStatus.getGroup());
            }
            log.info(String.format("Created %s with permissions %s and group %s",
                    new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission,
                    sourceDataFileStatus.getGroup()));
        }
    } catch (IOException e) {
        Throwables.propagate(e);
    }

    // Set hive runtime properties from conversion config
    for (Map.Entry<Object, Object> entry : getConversionConfig().getHiveRuntimeProperties().entrySet()) {
        conversionEntity.getQueries().add(String.format("SET %s=%s", entry.getKey(), entry.getValue()));
    }
    // Set hive runtime properties for tracking
    conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_DATASET_URN_KEY,
            conversionEntity.getHiveTable().getCompleteName()));
    if (conversionEntity.getHivePartition().isPresent()) {
        conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_PARTITION_NAME_KEY,
                conversionEntity.getHivePartition().get().getCompleteName()));
    }
    conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_WORKUNIT_CREATE_TIME_KEY,
            workUnit.getWorkunit().getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY)));

    // Create DDL statement for table
    Map<String, String> hiveColumns = new LinkedHashMap<>();
    String createStagingTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema,
            orcStagingTableName, orcStagingDataLocation, Optional.of(orcTableDatabase),
            Optional.of(partitionsDDLInfo), clusterBy,
            Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), numBuckets,
            Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), tableProperties,
            isEvolutionEnabled, destinationTableMeta, hiveColumns);
    conversionEntity.getQueries().add(createStagingTableDDL);
    log.debug("Create staging table DDL: " + createStagingTableDDL);

    // Create DDL statement for partition
    String orcStagingDataPartitionDirName = getOrcStagingDataPartitionDirName(conversionEntity,
            sourceDataPathIdentifier);
    String orcStagingDataPartitionLocation = orcStagingDataLocation + Path.SEPARATOR
            + orcStagingDataPartitionDirName;
    if (partitionsDMLInfo.size() > 0) {
        List<String> createStagingPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                orcTableDatabase, orcStagingTableName, orcStagingDataPartitionLocation, partitionsDMLInfo);

        conversionEntity.getQueries().addAll(createStagingPartitionDDL);
        log.debug("Create staging partition DDL: " + createStagingPartitionDDL);
    }

    // Create DML statement
    String insertInORCStagingTableDML = HiveAvroORCQueryGenerator.generateTableMappingDML(
            conversionEntity.getHiveTable().getAvroSchema(), outputAvroSchema, avroTableName,
            orcStagingTableName, Optional.of(conversionEntity.getHiveTable().getDbName()),
            Optional.of(orcTableDatabase), Optional.of(partitionsDMLInfo), Optional.<Boolean>absent(),
            Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit);
    conversionEntity.getQueries().add(insertInORCStagingTableDML);
    log.debug("Conversion staging DML: " + insertInORCStagingTableDML);

    // TODO: Split this method into two (conversion and publish)
    // Addition to WUS for Staging publish:
    // A. Evolution turned on:
    //    1. If table does not exists: simply create it (now it should exist)
    //    2. If table exists:
    //      2.1 Evolve table (alter table)
    //      2.2 If snapshot table:
    //          2.2.1 Delete data in final table directory
    //          2.2.2 Move data from staging to final table directory
    //          2.2.3 Drop this staging table and delete directories
    //      2.3 If partitioned table, move partitions from staging to final table; for all partitions:
    //          2.3.1 Drop if exists partition in final table
    //          2.3.2 Move partition directory
    //          2.3.3 Create partition with location
    //          2.3.4 Drop this staging table and delete directories
    // B. Evolution turned off:
    //    1. If table does not exists: simply create it (now it should exist)
    //    2. If table exists:
    //      2.1 Do not evolve table
    //      2.2 If snapshot table:
    //          2.2.1 Delete data in final table directory
    //          2.2.2 Move data from staging to final table directory
    //          2.2.3 Drop this staging table and delete directories
    //      2.3 If partitioned table, move partitions from staging to final table; for all partitions:
    //          2.3.1 Drop if exists partition in final table
    //          2.3.2 Move partition directory
    //          2.3.3 Create partition with location
    //          2.3.4 Drop this staging table and delete directories
    // Note: The queries below also serve as compatibility check module before conversion, an incompatible
    //      .. schema throws a Runtime exeption, hence preventing further execution
    QueryBasedHivePublishEntity publishEntity = new QueryBasedHivePublishEntity();
    List<String> publishQueries = publishEntity.getPublishQueries();
    Map<String, String> publishDirectories = publishEntity.getPublishDirectories();
    List<String> cleanupQueries = publishEntity.getCleanupQueries();
    List<String> cleanupDirectories = publishEntity.getCleanupDirectories();

    // Step:
    // A.1, B.1: If table does not exists, simply create it
    if (!destinationTableMeta.isPresent()) {
        String createTargetTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema,
                orcTableName, orcDataLocation, Optional.of(orcTableDatabase), Optional.of(partitionsDDLInfo),
                clusterBy, Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(),
                numBuckets, Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(),
                tableProperties, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>());
        publishQueries.add(createTargetTableDDL);
        log.debug("Create final table DDL: " + createTargetTableDDL);
    }

    // Step:
    // A.2.1: If table pre-exists (destinationTableMeta would be present), evolve table
    // B.2.1: No-op
    List<String> evolutionDDLs = HiveAvroORCQueryGenerator.generateEvolutionDDL(orcStagingTableName,
            orcTableName, Optional.of(orcTableDatabase), Optional.of(orcTableDatabase), outputAvroSchema,
            isEvolutionEnabled, hiveColumns, destinationTableMeta);
    log.debug("Evolve final table DDLs: " + evolutionDDLs);
    EventWorkunitUtils.setEvolutionMetadata(workUnit, evolutionDDLs);

    // View (if present) must be updated if evolution happens
    shouldUpdateView |= evolutionDDLs.size() > 0;

    publishQueries.addAll(evolutionDDLs);

    if (partitionsDDLInfo.size() == 0) {
        // Step:
        // A.2.2, B.2.2: Snapshot table

        // Step:
        // A.2.2.1, B.2.2.1: Delete data in final table directory
        // A.2.2.2, B.2.2.2: Move data from staging to final table directory
        log.info("Snapshot directory to move: " + orcStagingDataLocation + " to: " + orcDataLocation);
        publishDirectories.put(orcStagingDataLocation, orcDataLocation);

        // Step:
        // A.2.2.3, B.2.2.3: Drop this staging table and delete directories
        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase,
                orcStagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        // Delete: orcStagingDataLocation
        log.info("Staging table directory to delete: " + orcStagingDataLocation);
        cleanupDirectories.add(orcStagingDataLocation);

    } else {
        // Step:
        // A.2.3, B.2.3: If partitioned table, move partitions from staging to final table; for all partitions:

        // Step:
        // A.2.3.1, B.2.3.1: Drop if exists partition in final table
        List<String> dropPartitionsDDL = HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase,
                orcTableName, partitionsDMLInfo);
        log.debug("Drop partitions if exist in final table: " + dropPartitionsDDL);
        publishQueries.addAll(dropPartitionsDDL);

        // Step:
        // A.2.3.2, B.2.3.2: Move partition directory
        // Move: orcStagingDataPartitionLocation to: orcFinalDataPartitionLocation
        String orcFinalDataPartitionLocation = orcDataLocation + Path.SEPARATOR
                + orcStagingDataPartitionDirName;
        log.info("Partition directory to move: " + orcStagingDataPartitionLocation + " to: "
                + orcFinalDataPartitionLocation);
        publishDirectories.put(orcStagingDataPartitionLocation, orcFinalDataPartitionLocation);

        // Step:
        // A.2.3.3, B.2.3.3: Create partition with location (and update storage format if not in ORC already)
        String orcDataPartitionLocation = orcDataLocation + Path.SEPARATOR + orcStagingDataPartitionDirName;
        if (workUnit.getPropAsBoolean(HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY,
                DEFAULT_HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY)) {
            List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                    orcTableDatabase, orcTableName, orcDataPartitionLocation, partitionsDMLInfo,
                    Optional.<String>absent());

            log.debug("Create final partition DDL: " + createFinalPartitionDDL);
            publishQueries.addAll(createFinalPartitionDDL);

            // Updating storage format non-transactionally is a stop gap measure until Hive supports transactionally update
            // .. storage format in ADD PARITTION command (today it only supports specifying location)
            List<String> updatePartitionStorageFormatDDL = HiveAvroORCQueryGenerator
                    .generateAlterTableOrPartitionStorageFormatDDL(orcTableDatabase, orcTableName,
                            Optional.of(partitionsDMLInfo), ORC_FORMAT);
            log.debug("Update final partition storage format to ORC (if not already in ORC)");
            publishQueries.addAll(updatePartitionStorageFormatDDL);
        } else {
            List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                    orcTableDatabase, orcTableName, orcDataPartitionLocation, partitionsDMLInfo,
                    Optional.fromNullable(ORC_FORMAT));

            log.debug("Create final partition DDL: " + createFinalPartitionDDL);
            publishQueries.addAll(createFinalPartitionDDL);
        }

        // Step:
        // A.2.3.4, B.2.3.4: Drop this staging table and delete directories
        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase,
                orcStagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        // Delete: orcStagingDataLocation
        log.info("Staging table directory to delete: " + orcStagingDataLocation);
        cleanupDirectories.add(orcStagingDataLocation);
    }

    /*
     * Drop the replaced partitions if any. This is required in case the partition being converted is derived from
     * several other partitions. E.g. Daily partition is a replacement of hourly partitions of the same day. When daily
     * partition is converted to ORC all it's hourly ORC partitions need to be dropped.
     */
    publishQueries.addAll(HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase, orcTableName,
            getDropPartitionsDDLInfo(conversionEntity)));

    /*
     * Create or update view over the ORC table if specified in the config (ie. wrapper view name is present in config)
     */
    if (wrapperViewName.isPresent()) {
        String viewName = wrapperViewName.get();
        List<String> createOrUpdateViewDDLs = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL(
                orcTableDatabase, orcTableName, orcTableDatabase, viewName, shouldUpdateView);
        log.debug("Create or update View DDLs: " + createOrUpdateViewDDLs);
        publishQueries.addAll(createOrUpdateViewDDLs);

    }

    HiveAvroORCQueryGenerator.serializePublishCommands(workUnit, publishEntity);
    log.debug("Publish partition entity: " + publishEntity);

    log.debug("Conversion Query " + conversionEntity.getQueries());

    EventWorkunitUtils.setEndDDLBuildTimeMetadata(workUnit, System.currentTimeMillis());

    return new SingleRecordIterable<>(conversionEntity);
}

From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java

License:Apache License

/***
 * Get the ORC final table location of format: <ORC final table location>/final
 * @return ORC final table location./*from w  w  w  . ja v  a  2 s  .  c o  m*/
 */
private String getOrcDataLocation() {
    String orcDataLocation = getConversionConfig().getDestinationDataPath();

    return orcDataLocation + Path.SEPARATOR + PUBLISHED_TABLE_SUBDIRECTORY;
}

From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java

License:Apache License

/***
 * Get the ORC staging table location of format: <ORC final table location>/<ORC staging table name>
 * @param orcStagingTableName ORC staging table name.
 * @return ORC staging table location./*  ww  w.ja  v a 2 s  .  co  m*/
 */
private String getOrcStagingDataLocation(String orcStagingTableName) {
    String orcDataLocation = getConversionConfig().getDestinationDataPath();

    return orcDataLocation + Path.SEPARATOR + orcStagingTableName;
}

From source file:gobblin.data.management.conversion.hive.converter.HiveAvroToOrcConverter.java

License:Open Source License

private String getOrcDataLocation(WorkUnitState workUnit, String avroDataLocation, String orcTableName,
        Optional<String> postfix) {
    String orcDataLocation;//  ww w  .  j ava2 s . co  m

    // By default ORC table creates a new directory where Avro data resides with _orc postfix, but this can be
    // .. overridden by specifying this property
    String orcTableAlternateLocation = workUnit.getJobState().getProp(ORC_TABLE_ALTERNATE_LOCATION);
    if (StringUtils.isNotBlank(orcTableAlternateLocation)) {
        orcDataLocation = new Path(orcTableAlternateLocation, orcTableName).toString();
    } else {
        orcDataLocation = StringUtils.removeEnd(avroDataLocation, Path.SEPARATOR) + "_orc";
    }

    if (postfix.isPresent()) {
        orcDataLocation += postfix.get();
    }

    // Each job execution further writes to a sub-directory within ORC data directory to support stagin use-case
    // .. ie for atomic swap
    orcDataLocation += Path.SEPARATOR + workUnit.getJobState().getId();

    return orcDataLocation;
}