Example usage for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java

License:Apache License

private String getSnapshotDir(String dirName) {
    dirName = StringUtils.removeEnd(dirName, Path.SEPARATOR);
    return dirName + Path.SEPARATOR + HdfsSnapshotUtil.SNAPSHOT_DIR_PREFIX + Path.SEPARATOR;
}

From source file:org.apache.falcon.snapshots.retention.HdfsSnapshotEvictor.java

License:Apache License

protected static void evictSnapshots(DistributedFileSystem fs, String dirName, String ageLimit,
        int numSnapshots) throws FalconException {
    try {/*from w  w  w  . ja va2s  .c om*/
        LOG.info("Started evicting snapshots on dir {}{} using policy {}, agelimit {}, numSnapshot {}",
                fs.getUri(), dirName, ageLimit, numSnapshots);

        long evictionTime = System.currentTimeMillis() - EvictionHelper.evalExpressionToMilliSeconds(ageLimit);

        dirName = StringUtils.removeEnd(dirName, Path.SEPARATOR);
        String snapshotDir = dirName + Path.SEPARATOR + HdfsSnapshotUtil.SNAPSHOT_DIR_PREFIX + Path.SEPARATOR;
        FileStatus[] snapshots = fs.listStatus(new Path(snapshotDir));
        if (snapshots.length <= numSnapshots) {
            // no eviction needed
            return;
        }

        // Sort by last modified time, ascending order.
        Arrays.sort(snapshots, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus f1, FileStatus f2) {
                return Long.compare(f1.getModificationTime(), f2.getModificationTime());
            }
        });

        for (int i = 0; i < (snapshots.length - numSnapshots); i++) {
            // delete if older than ageLimit while retaining numSnapshots
            if (snapshots[i].getModificationTime() < evictionTime) {
                fs.deleteSnapshot(new Path(dirName), snapshots[i].getPath().getName());
            }
        }

    } catch (ELException ele) {
        LOG.warn("Unable to parse retention age limit {} {}", ageLimit, ele.getMessage());
        throw new FalconException("Unable to parse retention age limit " + ageLimit, ele);
    } catch (IOException ioe) {
        LOG.warn("Unable to evict snapshots from dir {} {}", dirName, ioe);
        throw new FalconException("Unable to evict snapshots from dir " + dirName, ioe);
    }

}

From source file:org.apache.flume.sink.hive.TestUtil.java

License:Apache License

public static void createDbAndTable(Driver driver, String databaseName, String tableName, List<String> partVals,
        String[] colNames, String[] colTypes, String[] partNames, String dbLocation) throws Exception {
    String dbUri = "raw://" + dbLocation;
    String tableLoc = dbUri + Path.SEPARATOR + tableName;

    runDDL(driver, "create database IF NOT EXISTS " + databaseName + " location '" + dbUri + "'");
    runDDL(driver, "use " + databaseName);
    String crtTbl = "create table " + tableName + " ( " + getTableColumnsStr(colNames, colTypes) + " )"
            + getPartitionStmtStr(partNames) + " clustered by ( " + colNames[0] + " )" + " into 10 buckets "
            + " stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('transactional'='true')";

    runDDL(driver, crtTbl);//from  w  w  w .  j  a  v a 2s. c o  m
    System.out.println("crtTbl = " + crtTbl);
    if (partNames != null && partNames.length != 0) {
        String addPart = "alter table " + tableName + " add partition ( "
                + getTablePartsStr2(partNames, partVals) + " )";
        runDDL(driver, addPart);
    }
}

From source file:org.apache.gobblin.cluster.GobblinClusterUtils.java

License:Apache License

/**
 * Generate the path to the job.state file
 * @param usingStateStore is a state store being used to store the job.state content
 * @param appWorkPath work directory/*www  .jav  a  2 s.  co  m*/
 * @param jobId job id
 * @return a {@link Path} referring to the job.state
 */
public static Path getJobStateFilePath(boolean usingStateStore, Path appWorkPath, String jobId) {
    final Path jobStateFilePath;

    // the state store uses a path of the form workdir/_jobstate/job_id/job_id.job.state while old method stores the file
    // in the app work dir.
    if (usingStateStore) {
        jobStateFilePath = new Path(appWorkPath,
                GobblinClusterConfigurationKeys.JOB_STATE_DIR_NAME + Path.SEPARATOR + jobId + Path.SEPARATOR
                        + jobId + "." + AbstractJobLauncher.JOB_STATE_FILE_NAME);

    } else {
        jobStateFilePath = new Path(appWorkPath, jobId + "." + AbstractJobLauncher.JOB_STATE_FILE_NAME);
    }

    log.info("job state file path: " + jobStateFilePath);

    return jobStateFilePath;
}

From source file:org.apache.gobblin.cluster.GobblinHelixJobLauncher.java

License:Apache License

public GobblinHelixJobLauncher(Properties jobProps, final HelixManager helixManager, Path appWorkDir,
        List<? extends Tag<?>> metadataTags, ConcurrentHashMap<String, Boolean> runningMap,
        Optional<GobblinHelixMetrics> helixMetrics) throws Exception {

    super(jobProps, addAdditionalMetadataTags(jobProps, metadataTags));
    LOGGER.debug("GobblinHelixJobLauncher: jobProps {}, appWorkDir {}", jobProps, appWorkDir);
    this.helixManager = helixManager;
    this.helixTaskDriver = new TaskDriver(this.helixManager);
    this.runningMap = runningMap;
    this.appWorkDir = appWorkDir;
    this.inputWorkUnitDir = new Path(appWorkDir, GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME);
    this.outputTaskStateDir = new Path(this.appWorkDir,
            GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + Path.SEPARATOR
                    + this.jobContext.getJobId());

    this.helixWorkFlowName = this.jobContext.getJobId();
    this.jobContext.getJobState().setJobLauncherType(LauncherTypeEnum.CLUSTER);

    this.stateSerDeRunnerThreads = Integer
            .parseInt(jobProps.getProperty(ParallelRunner.PARALLEL_RUNNER_THREADS_KEY,
                    Integer.toString(ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS)));

    jobConfig = ConfigUtils.propertiesToConfig(jobProps);

    this.workFlowExpiryTimeSeconds = ConfigUtils.getLong(jobConfig,
            GobblinClusterConfigurationKeys.HELIX_WORKFLOW_EXPIRY_TIME_SECONDS,
            GobblinClusterConfigurationKeys.DEFAULT_HELIX_WORKFLOW_EXPIRY_TIME_SECONDS);

    this.helixJobStopTimeoutSeconds = ConfigUtils.getLong(jobConfig,
            GobblinClusterConfigurationKeys.HELIX_JOB_STOP_TIMEOUT_SECONDS,
            GobblinClusterConfigurationKeys.DEFAULT_HELIX_JOB_STOP_TIMEOUT_SECONDS);

    Config stateStoreJobConfig = ConfigUtils.propertiesToConfig(jobProps).withValue(
            ConfigurationKeys.STATE_STORE_FS_URI_KEY,
            ConfigValueFactory.fromAnyRef(new URI(appWorkDir.toUri().getScheme(), null,
                    appWorkDir.toUri().getHost(), appWorkDir.toUri().getPort(), null, null, null).toString()));

    this.stateStores = new StateStores(stateStoreJobConfig, appWorkDir,
            GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME, appWorkDir,
            GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME, appWorkDir,
            GobblinClusterConfigurationKeys.JOB_STATE_DIR_NAME);

    URI fsUri = URI.create(jobProps.getProperty(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI));
    this.fs = FileSystem.get(fsUri, new Configuration());

    this.taskStateCollectorService = new TaskStateCollectorService(jobProps, this.jobContext.getJobState(),
            this.eventBus, this.stateStores.getTaskStateStore(), this.outputTaskStateDir);

    this.helixMetrics = helixMetrics;
    startCancellationExecutor();/* w w  w  . j  a  v  a 2s. c  o  m*/
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator.java

License:Apache License

private static boolean isFailedPath(Path path, List<TaskCompletionEvent> failedEvents) {
    return path.toString().contains("_temporary") || failedEvents.stream().anyMatch(event -> path.toString()
            .contains(Path.SEPARATOR + event.getTaskAttemptId().toString() + Path.SEPARATOR));
}

From source file:org.apache.gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java

License:Apache License

/**
 * Populate the avro to orc conversion queries. The Queries will be added to {@link QueryBasedHiveConversionEntity#getQueries()}
 *///from  w  w  w  . j  a v a2s .  c om
@Override
public Iterable<QueryBasedHiveConversionEntity> convertRecord(Schema outputAvroSchema,
        QueryBasedHiveConversionEntity conversionEntity, WorkUnitState workUnit)
        throws DataConversionException {

    Preconditions.checkNotNull(outputAvroSchema, "Avro schema must not be null");
    Preconditions.checkNotNull(conversionEntity, "Conversion entity must not be null");
    Preconditions.checkNotNull(workUnit, "Workunit state must not be null");
    Preconditions.checkNotNull(conversionEntity.getTable(),
            "Hive table within conversion entity must not be null");

    EventWorkunitUtils.setBeginDDLBuildTimeMetadata(workUnit, System.currentTimeMillis());

    this.hiveDataset = conversionEntity.getConvertibleHiveDataset();

    if (!hasConversionConfig()) {
        return new SingleRecordIterable<>(conversionEntity);
    }

    // Avro table name and location
    String avroTableName = conversionEntity.getTable().getTableName();

    // ORC table name and location
    String orcTableName = getConversionConfig().getDestinationTableName();
    String orcStagingTableName = getOrcStagingTableName(getConversionConfig().getDestinationStagingTableName());
    String orcTableDatabase = getConversionConfig().getDestinationDbName();
    String orcDataLocation = getOrcDataLocation();
    String orcStagingDataLocation = getOrcStagingDataLocation(orcStagingTableName);
    boolean isEvolutionEnabled = getConversionConfig().isEvolutionEnabled();
    Pair<Optional<Table>, Optional<List<Partition>>> destinationMeta = HiveConverterUtils
            .getDestinationTableMeta(orcTableDatabase, orcTableName, workUnit.getProperties());
    Optional<Table> destinationTableMeta = destinationMeta.getLeft();

    // Optional
    // View registration blacklist / whitelist
    Optional<WhitelistBlacklist> optionalViewRegistrationWhiteBlacklist = getViewWhiteBackListFromWorkUnit(
            workUnit);

    // wrapperViewName          : If specified view with 'wrapperViewName' is created if not already exists
    //                            over destination table
    // isUpdateViewAlwaysEnabled: If false 'wrapperViewName' is only updated when schema evolves; if true
    //                            'wrapperViewName' is always updated (everytime publish happens)
    Optional<String> wrapperViewName = Optional.<String>absent();
    if (optionalViewRegistrationWhiteBlacklist.isPresent()) {
        wrapperViewName = optionalViewRegistrationWhiteBlacklist.get().acceptTable(orcTableDatabase,
                orcTableName) ? getConversionConfig().getDestinationViewName() : wrapperViewName;
    } else {
        wrapperViewName = getConversionConfig().getDestinationViewName();
    }
    boolean shouldUpdateView = getConversionConfig().isUpdateViewAlwaysEnabled();

    // Other properties
    Optional<List<String>> clusterBy = getConversionConfig().getClusterBy().isEmpty()
            ? Optional.<List<String>>absent()
            : Optional.of(getConversionConfig().getClusterBy());
    Optional<Integer> numBuckets = getConversionConfig().getNumBuckets();
    Optional<Integer> rowLimit = getConversionConfig().getRowLimit();
    Properties tableProperties = getConversionConfig().getDestinationTableProperties();

    // Partition dir hint helps create different directory for hourly and daily partition with same timestamp, such as:
    // .. daily_2016-01-01-00 and hourly_2016-01-01-00
    // This helps existing hourly data from not being deleted at the time of roll up, and so Hive queries in flight
    // .. do not fail
    List<String> sourceDataPathIdentifier = getConversionConfig().getSourceDataPathIdentifier();

    // Populate optional partition info
    Map<String, String> partitionsDDLInfo = Maps.newHashMap();
    Map<String, String> partitionsDMLInfo = Maps.newHashMap();
    HiveConverterUtils.populatePartitionInfo(conversionEntity, partitionsDDLInfo, partitionsDMLInfo);

    /*
     * Create ORC data location with the same permissions as Avro data
     *
     * Note that hive can also automatically create the non-existing directories but it does not
     * seem to create it with the desired permissions.
     * According to hive docs permissions for newly created directories/files can be controlled using uMask like,
     *
     * SET hive.warehouse.subdir.inherit.perms=false;
     * SET fs.permissions.umask-mode=022;
     * Upon testing, this did not work
     */
    try {
        FileStatus sourceDataFileStatus = this.fs.getFileStatus(conversionEntity.getTable().getDataLocation());
        FsPermission sourceDataPermission = sourceDataFileStatus.getPermission();
        if (!this.fs.mkdirs(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission)) {
            throw new RuntimeException(String.format("Failed to create path %s with permissions %s",
                    new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission));
        } else {
            this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()),
                    sourceDataPermission);

            // Explicitly set group name for destination location if specified otherwise preserve source group name
            String destinationGroupName;
            if (workUnit.contains(HIVE_DATASET_DESTINATION_GROUP_NAME)) {
                destinationGroupName = workUnit.getProp(HIVE_DATASET_DESTINATION_GROUP_NAME);
            } else {
                destinationGroupName = sourceDataFileStatus.getGroup();
            }
            if (!workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP,
                    DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) {
                this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null,
                        destinationGroupName);
            }
            log.info(String.format("Created %s with permissions %s and group %s",
                    new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission,
                    sourceDataFileStatus.getGroup()));

            // Explicitly set group name for staging directory if specified
            if (workUnit.contains(HIVE_DATASET_STAGING_GROUP_NAME)) {
                String stagingGroupName = workUnit.getProp(HIVE_DATASET_STAGING_GROUP_NAME);
                log.info("Setting staging directory group name as " + stagingGroupName);
                this.fs.mkdirs(new Path(getOrcStagingDataLocation(orcStagingTableName)));
                this.fs.setOwner(new Path(getOrcStagingDataLocation(orcStagingTableName)), null,
                        stagingGroupName);

                // Staging directory will be renamed to getOrcDataLocation() and hence it's group name should match
                // with the group name of the staging directory
                this.fs.mkdirs(new Path(getOrcDataLocation()));
                this.fs.setOwner(new Path(getOrcDataLocation()), null, stagingGroupName);
            }
        }
    } catch (IOException e) {
        Throwables.propagate(e);
    }

    // Set hive runtime properties from conversion config
    for (Map.Entry<Object, Object> entry : getConversionConfig().getHiveRuntimeProperties().entrySet()) {
        conversionEntity.getQueries().add(String.format("SET %s=%s", entry.getKey(), entry.getValue()));
    }
    // Set hive runtime properties for tracking
    conversionEntity.getQueries().add(
            String.format("SET %s=%s", GOBBLIN_DATASET_URN_KEY, conversionEntity.getTable().getCompleteName()));
    if (conversionEntity.getPartition().isPresent()) {
        conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_PARTITION_NAME_KEY,
                conversionEntity.getPartition().get().getCompleteName()));
    }
    conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_WORKUNIT_CREATE_TIME_KEY,
            workUnit.getWorkunit().getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY)));

    // Create DDL statement for table
    Map<String, String> hiveColumns = new LinkedHashMap<>();
    String createStagingTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema,
            orcStagingTableName, orcStagingDataLocation, Optional.of(orcTableDatabase),
            Optional.of(partitionsDDLInfo), clusterBy,
            Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), numBuckets,
            Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), tableProperties,
            isEvolutionEnabled, destinationTableMeta, hiveColumns);
    conversionEntity.getQueries().add(createStagingTableDDL);
    log.debug("Create staging table DDL: " + createStagingTableDDL);

    // Create DDL statement for partition
    String orcStagingDataPartitionDirName = HiveConverterUtils.getStagingDataPartitionDirName(conversionEntity,
            sourceDataPathIdentifier);
    String orcStagingDataPartitionLocation = orcStagingDataLocation + Path.SEPARATOR
            + orcStagingDataPartitionDirName;
    if (partitionsDMLInfo.size() > 0) {
        List<String> createStagingPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                orcTableDatabase, orcStagingTableName, orcStagingDataPartitionLocation, partitionsDMLInfo);

        conversionEntity.getQueries().addAll(createStagingPartitionDDL);
        log.debug("Create staging partition DDL: " + createStagingPartitionDDL);
    }

    // Create DML statement
    String insertInORCStagingTableDML = HiveAvroORCQueryGenerator.generateTableMappingDML(
            conversionEntity.getHiveTable().getAvroSchema(), outputAvroSchema, avroTableName,
            orcStagingTableName, Optional.of(conversionEntity.getTable().getDbName()),
            Optional.of(orcTableDatabase), Optional.of(partitionsDMLInfo), Optional.<Boolean>absent(),
            Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit);
    conversionEntity.getQueries().add(insertInORCStagingTableDML);
    log.debug("Conversion staging DML: " + insertInORCStagingTableDML);

    // TODO: Split this method into two (conversion and publish)
    // Addition to WUS for Staging publish:
    // A. Evolution turned on:
    //    1. If table does not exists: simply create it (now it should exist)
    //    2. If table exists:
    //      2.1 Evolve table (alter table)
    //      2.2 If snapshot table:
    //          2.2.1 Delete data in final table directory
    //          2.2.2 Move data from staging to final table directory
    //          2.2.3 Drop this staging table and delete directories
    //      2.3 If partitioned table, move partitions from staging to final table; for all partitions:
    //          2.3.1 Drop if exists partition in final table
    //          2.3.2 Move partition directory
    //          2.3.3 Create partition with location
    //          2.3.4 Drop this staging table and delete directories
    // B. Evolution turned off:
    //    1. If table does not exists: simply create it (now it should exist)
    //    2. If table exists:
    //      2.1 Do not evolve table
    //      2.2 If snapshot table:
    //          2.2.1 Delete data in final table directory
    //          2.2.2 Move data from staging to final table directory
    //          2.2.3 Drop this staging table and delete directories
    //      2.3 If partitioned table, move partitions from staging to final table; for all partitions:
    //          2.3.1 Drop if exists partition in final table
    //          2.3.2 Move partition directory
    //          2.3.3 Create partition with location
    //          2.3.4 Drop this staging table and delete directories
    // Note: The queries below also serve as compatibility check module before conversion, an incompatible
    //      .. schema throws a Runtime exeption, hence preventing further execution
    QueryBasedHivePublishEntity publishEntity = new QueryBasedHivePublishEntity();
    List<String> publishQueries = publishEntity.getPublishQueries();
    Map<String, String> publishDirectories = publishEntity.getPublishDirectories();
    List<String> cleanupQueries = publishEntity.getCleanupQueries();
    List<String> cleanupDirectories = publishEntity.getCleanupDirectories();

    // Step:
    // A.1, B.1: If table does not exists, simply create it
    if (!destinationTableMeta.isPresent()) {
        String createTargetTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema,
                orcTableName, orcDataLocation, Optional.of(orcTableDatabase), Optional.of(partitionsDDLInfo),
                clusterBy, Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(),
                numBuckets, Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(),
                tableProperties, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>());
        publishQueries.add(createTargetTableDDL);
        log.debug("Create final table DDL: " + createTargetTableDDL);
    }

    // Step:
    // A.2.1: If table pre-exists (destinationTableMeta would be present), evolve table
    // B.2.1: No-op
    List<String> evolutionDDLs = HiveAvroORCQueryGenerator.generateEvolutionDDL(orcStagingTableName,
            orcTableName, Optional.of(orcTableDatabase), Optional.of(orcTableDatabase), outputAvroSchema,
            isEvolutionEnabled, hiveColumns, destinationTableMeta);
    log.debug("Evolve final table DDLs: " + evolutionDDLs);
    EventWorkunitUtils.setEvolutionMetadata(workUnit, evolutionDDLs);

    // View (if present) must be updated if evolution happens
    shouldUpdateView |= evolutionDDLs.size() > 0;

    publishQueries.addAll(evolutionDDLs);

    if (partitionsDDLInfo.size() == 0) {
        // Step:
        // A.2.2, B.2.2: Snapshot table

        // Step:
        // A.2.2.1, B.2.2.1: Delete data in final table directory
        // A.2.2.2, B.2.2.2: Move data from staging to final table directory
        log.info("Snapshot directory to move: " + orcStagingDataLocation + " to: " + orcDataLocation);
        publishDirectories.put(orcStagingDataLocation, orcDataLocation);

        // Step:
        // A.2.2.3, B.2.2.3: Drop this staging table and delete directories
        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase,
                orcStagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        // Delete: orcStagingDataLocation
        log.info("Staging table directory to delete: " + orcStagingDataLocation);
        cleanupDirectories.add(orcStagingDataLocation);

    } else {
        // Step:
        // A.2.3, B.2.3: If partitioned table, move partitions from staging to final table; for all partitions:

        // Step:
        // A.2.3.2, B.2.3.2: Move partition directory
        // Move: orcStagingDataPartitionLocation to: orcFinalDataPartitionLocation
        String orcFinalDataPartitionLocation = orcDataLocation + Path.SEPARATOR
                + orcStagingDataPartitionDirName;
        Optional<Path> destPartitionLocation = getDestinationPartitionLocation(destinationTableMeta, workUnit,
                conversionEntity.getPartition().get().getName());
        orcFinalDataPartitionLocation = HiveConverterUtils
                .updatePartitionLocation(orcFinalDataPartitionLocation, workUnit, destPartitionLocation);
        log.info("Partition directory to move: " + orcStagingDataPartitionLocation + " to: "
                + orcFinalDataPartitionLocation);
        publishDirectories.put(orcStagingDataPartitionLocation, orcFinalDataPartitionLocation);
        // Step:
        // A.2.3.1, B.2.3.1: Drop if exists partition in final table

        // Step:
        // If destination partition already exists, alter the partition location
        // A.2.3.3, B.2.3.3: Create partition with location (and update storage format if not in ORC already)
        List<String> dropPartitionsDDL = HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase,
                orcTableName, partitionsDMLInfo);
        log.debug("Drop partitions if exist in final table: " + dropPartitionsDDL);
        publishQueries.addAll(dropPartitionsDDL);
        if (workUnit.getPropAsBoolean(HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY,
                DEFAULT_HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY)) {
            List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                    orcTableDatabase, orcTableName, orcFinalDataPartitionLocation, partitionsDMLInfo,
                    Optional.<String>absent());

            log.debug("Create final partition DDL: " + createFinalPartitionDDL);
            publishQueries.addAll(createFinalPartitionDDL);

            // Updating storage format non-transactionally is a stop gap measure until Hive supports transactionally update
            // .. storage format in ADD PARITTION command (today it only supports specifying location)
            List<String> updatePartitionStorageFormatDDL = HiveAvroORCQueryGenerator
                    .generateAlterTableOrPartitionStorageFormatDDL(orcTableDatabase, orcTableName,
                            Optional.of(partitionsDMLInfo), ORC_FORMAT);
            log.debug("Update final partition storage format to ORC (if not already in ORC)");
            publishQueries.addAll(updatePartitionStorageFormatDDL);
        } else {
            List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                    orcTableDatabase, orcTableName, orcFinalDataPartitionLocation, partitionsDMLInfo,
                    Optional.fromNullable(ORC_FORMAT));

            log.debug("Create final partition DDL: " + createFinalPartitionDDL);
            publishQueries.addAll(createFinalPartitionDDL);
        }

        // Step:
        // A.2.3.4, B.2.3.4: Drop this staging table and delete directories
        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase,
                orcStagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        // Delete: orcStagingDataLocation
        log.info("Staging table directory to delete: " + orcStagingDataLocation);
        cleanupDirectories.add(orcStagingDataLocation);
    }

    /*
     * Drop the replaced partitions if any. This is required in case the partition being converted is derived from
     * several other partitions. E.g. Daily partition is a replacement of hourly partitions of the same day. When daily
     * partition is converted to ORC all it's hourly ORC partitions need to be dropped.
     */
    publishQueries.addAll(HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase, orcTableName,
            getDropPartitionsDDLInfo(conversionEntity)));

    /*
     * Create or update view over the ORC table if specified in the config (ie. wrapper view name is present in config)
     */
    if (wrapperViewName.isPresent()) {
        String viewName = wrapperViewName.get();
        List<String> createOrUpdateViewDDLs = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL(
                orcTableDatabase, orcTableName, orcTableDatabase, viewName, shouldUpdateView);
        log.debug("Create or update View DDLs: " + createOrUpdateViewDDLs);
        publishQueries.addAll(createOrUpdateViewDDLs);

    }

    HiveAvroORCQueryGenerator.serializePublishCommands(workUnit, publishEntity);
    log.debug("Publish partition entity: " + publishEntity);

    log.debug("Conversion Query " + conversionEntity.getQueries());

    EventWorkunitUtils.setEndDDLBuildTimeMetadata(workUnit, System.currentTimeMillis());

    return new SingleRecordIterable<>(conversionEntity);
}

From source file:org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset.java

License:Apache License

private List<DatasetDescriptor> createDestDatasets() {
    List<DatasetDescriptor> destDatasets = new ArrayList<>();
    for (String format : getDestFormats()) {
        Optional<ConversionConfig> conversionConfigForFormat = getConversionConfigForFormat(format);
        if (!conversionConfigForFormat.isPresent()) {
            continue;
        }//from www. j a  va2 s  .c  om
        String destTable = conversionConfigForFormat.get().getDestinationDbName() + "."
                + conversionConfigForFormat.get().getDestinationTableName();
        DatasetDescriptor dest = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destTable);
        String destLocation = conversionConfigForFormat.get().getDestinationDataPath() + Path.SEPARATOR
                + "final";
        dest.addMetadata(DatasetConstants.FS_SCHEME,
                getSourceDataset().getMetadata().get(DatasetConstants.FS_SCHEME));
        dest.addMetadata(DatasetConstants.FS_LOCATION, destLocation);
        destDatasets.add(dest);
    }
    return destDatasets;
}

From source file:org.apache.gobblin.data.management.conversion.hive.materializer.HiveMaterializerFromEntityQueryGenerator.java

License:Apache License

public HiveMaterializerFromEntityQueryGenerator(WorkUnitState workUnitState, boolean supportTargetPartitioning)
        throws IOException {
    super(workUnitState);

    try {/*from   w  ww.  ja v  a2  s.c o m*/
        this.conversionEntity = getConversionEntity(this.workUnit);
    } catch (TException | HiveException ex) {
        throw new IOException(ex);
    }
    this.sourceTable = this.conversionEntity.getTable();
    this.inputDbName = this.sourceTable.getDbName();
    this.inputTableName = this.sourceTable.getTableName();

    this.sourceDataPathIdentifier = this.outputTableMetadata.getSourceDataPathIdentifier();
    this.stagingDataPartitionDirName = HiveConverterUtils.getStagingDataPartitionDirName(conversionEntity,
            sourceDataPathIdentifier);
    this.stagingDataPartitionLocation = stagingDataLocation + Path.SEPARATOR + stagingDataPartitionDirName;
    this.partitionsDDLInfo = Maps.newHashMap();
    this.partitionsDMLInfo = Maps.newHashMap();
    HiveConverterUtils.populatePartitionInfo(conversionEntity, partitionsDDLInfo, partitionsDMLInfo);
    this.supportTargetPartitioning = supportTargetPartitioning;
}

From source file:org.apache.gobblin.data.management.conversion.hive.materializer.HiveMaterializerFromEntityQueryGenerator.java

License:Apache License

/**
 * Returns a QueryBasedHivePublishEntity which includes publish level queries and cleanup commands.
 * @return QueryBasedHivePublishEntity//from   ww  w . j av  a  2s.c om
 * @throws DataConversionException
 */
public QueryBasedHivePublishEntity generatePublishQueries() throws DataConversionException {

    QueryBasedHivePublishEntity publishEntity = new QueryBasedHivePublishEntity();
    List<String> publishQueries = publishEntity.getPublishQueries();
    Map<String, String> publishDirectories = publishEntity.getPublishDirectories();
    List<String> cleanupQueries = publishEntity.getCleanupQueries();
    List<String> cleanupDirectories = publishEntity.getCleanupDirectories();

    String createFinalTableDDL = HiveConverterUtils.generateCreateDuplicateTableDDL(outputDatabaseName,
            stagingTableName, outputTableName, outputDataLocation, Optional.of(outputDatabaseName));
    publishQueries.add(createFinalTableDDL);
    log.debug("Create final table DDL:\n" + createFinalTableDDL);

    if (!this.supportTargetPartitioning || partitionsDDLInfo.size() == 0) {
        log.debug("Snapshot directory to move: " + stagingDataLocation + " to: " + outputDataLocation);
        publishDirectories.put(stagingDataLocation, outputDataLocation);

        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(outputDatabaseName,
                stagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        log.debug("Staging table directory to delete: " + stagingDataLocation);
        cleanupDirectories.add(stagingDataLocation);
    } else {
        String finalDataPartitionLocation = outputDataLocation + Path.SEPARATOR + stagingDataPartitionDirName;
        Optional<Path> destPartitionLocation = HiveConverterUtils.getDestinationPartitionLocation(
                destinationTableMeta, this.workUnitState, conversionEntity.getPartition().get().getName());
        finalDataPartitionLocation = HiveConverterUtils.updatePartitionLocation(finalDataPartitionLocation,
                this.workUnitState, destPartitionLocation);

        log.debug("Partition directory to move: " + stagingDataPartitionLocation + " to: "
                + finalDataPartitionLocation);
        publishDirectories.put(stagingDataPartitionLocation, finalDataPartitionLocation);
        List<String> dropPartitionsDDL = HiveAvroORCQueryGenerator.generateDropPartitionsDDL(outputDatabaseName,
                outputTableName, partitionsDMLInfo);
        log.debug("Drop partitions if exist in final table: " + dropPartitionsDDL);
        publishQueries.addAll(dropPartitionsDDL);
        List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(
                outputDatabaseName, outputTableName, finalDataPartitionLocation, partitionsDMLInfo,
                Optional.<String>absent());

        log.debug("Create final partition DDL: " + createFinalPartitionDDL);
        publishQueries.addAll(createFinalPartitionDDL);

        String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(outputDatabaseName,
                stagingTableName);

        log.debug("Drop staging table DDL: " + dropStagingTableDDL);
        cleanupQueries.add(dropStagingTableDDL);

        log.debug("Staging table directory to delete: " + stagingDataLocation);
        cleanupDirectories.add(stagingDataLocation);

        publishQueries.addAll(HiveAvroORCQueryGenerator.generateDropPartitionsDDL(outputDatabaseName,
                outputTableName, AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity)));
    }

    log.info("Publish partition entity: " + publishEntity);
    return publishEntity;
}