Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.//from w w w.  j a va2 s. c  om
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}

From source file:gobblin.data.management.ConversionHiveTestUtils.java

License:Apache License

public static Schema readSchemaFromJsonFile(String directory, String filename) throws IOException {

    return new Schema.Parser().parse(ConversionHiveTestUtils.class.getClassLoader()
            .getResourceAsStream(StringUtils.removeEnd(directory, Path.SEPARATOR) + Path.SEPARATOR + filename));
}

From source file:gobblin.data.management.ConversionHiveTestUtils.java

License:Apache License

public static String readQueryFromFile(String directory, String filename) throws IOException {
    InputStream is = ConversionHiveTestUtils.class.getClassLoader()
            .getResourceAsStream(StringUtils.removeEnd(directory, Path.SEPARATOR) + Path.SEPARATOR + filename);

    return IOUtils.toString(is, "UTF-8");
}

From source file:gobblin.data.management.copy.hive.HiveDatasetFinder.java

License:Apache License

/**
 * Gets the {@link Config} for this <code>dbAndTable</code>.
 * Cases:/*from   w  w w. ja  v a2 s  .com*/
 * <ul>
 * <li>If {@link #configStoreUri} is available it gets the dataset config from the config store at this uri
 * <li>If {@link #configStoreUri} is not available it uses the job config as dataset config
 * <li>If {@link #datasetConfigPrefix} is specified, only configs with this prefix is returned
 * <li>If {@link #datasetConfigPrefix} is not specified, all configs are returned
 * </ul>
 * @param table of the dataset to get config
 * @return the {@link Config} for <code>dbAndTable</code>
 */
private Config getDatasetConfig(Table table)
        throws ConfigStoreFactoryDoesNotExistsException, ConfigStoreCreationException, URISyntaxException {

    Config datasetConfig;

    // Config store enabled
    if (this.configStoreUri.isPresent()) {
        datasetConfig = this.configClient.getConfig(
                this.configStoreUri.get() + Path.SEPARATOR + this.configStoreDatasetUriBuilder.apply(table));

        // If config store is not enabled use job config
    } else {
        datasetConfig = this.jobConfig;
    }

    return StringUtils.isBlank(this.datasetConfigPrefix) ? datasetConfig
            : ConfigUtils.getConfig(datasetConfig, this.datasetConfigPrefix, ConfigFactory.empty());
}

From source file:gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java

License:Apache License

/**
 * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root
 * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data
 * by moving the file from staging to output directory.
 *
 * @see gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(gobblin.data.management.copy.FileAwareInputStream)
 *///from www .  j av a2  s .  c  o  m
@Override
public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
    this.closer.register(inputStream);

    TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream);
    final ReadableByteChannel inputChannel = Channels.newChannel(tarIn);
    TarArchiveEntry tarEntry;

    // flush the first entry in the tar, which is just the root directory
    tarEntry = tarIn.getNextTarEntry();
    String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR);

    log.info("Unarchiving at " + writeAt);

    try {
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {

            // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file
            String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName());
            Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath);

            if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
                this.fs.mkdirs(tarEntryStagingPath);
            } else if (!tarEntry.isDirectory()) {
                FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true);
                final WritableByteChannel outputChannel = Channels.newChannel(out);
                try {
                    StreamCopier copier = new StreamCopier(inputChannel, outputChannel);
                    if (isInstrumentationEnabled()) {
                        copier.withCopySpeedMeter(this.copySpeedMeter);
                    }
                    this.bytesWritten.addAndGet(copier.copy());
                    if (isInstrumentationEnabled()) {
                        log.info("File {}: copied {} bytes, average rate: {} B/s",
                                copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(),
                                this.copySpeedMeter.getMeanRate());
                    } else {
                        log.info("File {} copied.", copyableFile.getOrigin().getPath());
                    }
                } finally {
                    out.close();
                    outputChannel.close();
                }
            }
        }
    } finally {
        tarIn.close();
        inputChannel.close();
        inputStream.close();
    }
}

From source file:gobblin.data.management.hive.HiveConfigClientUtils.java

License:Apache License

/**
 * Get the dataset uri for a hive db and table. The uri is relative to the store uri .
 * @param table the hive table for which a config client uri needs to be built
 *///from   www  .  j  a v  a2 s  .c  o m
public static String getDatasetUri(Table table) {
    return HIVE_DATASETS_CONFIG_PREFIX + table.getDbName() + Path.SEPARATOR + table.getTableName();
}

From source file:gobblin.data.management.retention.sql.SqlBasedRetentionPoc.java

License:Apache License

private void insertSnapshot(Path snapshotPath) throws Exception {

    String datasetPath = StringUtils.substringBeforeLast(snapshotPath.toString(), Path.SEPARATOR);
    String snapshotName = StringUtils.substringAfterLast(snapshotPath.toString(), Path.SEPARATOR);
    long ts = Long.parseLong(StringUtils.substringBefore(snapshotName, "-PT-"));
    long recordCount = Long.parseLong(StringUtils.substringAfter(snapshotName, "-PT-"));

    PreparedStatement insert = connection.prepareStatement("INSERT INTO Snapshots VALUES (?, ?, ?, ?, ?)");
    insert.setString(1, datasetPath);/*from w w w  .  j av a 2  s.  com*/
    insert.setString(2, snapshotName);
    insert.setString(3, snapshotPath.toString());
    insert.setTimestamp(4, new Timestamp(ts));
    insert.setLong(5, recordCount);

    insert.executeUpdate();

}

From source file:gobblin.data.management.retention.sql.SqlBasedRetentionPoc.java

License:Apache License

private void insertDailyPartition(Path dailyPartitionPath) throws Exception {

    String datasetPath = StringUtils.substringBeforeLast(dailyPartitionPath.toString(),
            Path.SEPARATOR + "daily");

    DateTime partition = DateTimeFormat.forPattern(DAILY_PARTITION_PATTERN)
            .parseDateTime(StringUtils.substringAfter(dailyPartitionPath.toString(), "daily" + Path.SEPARATOR));

    PreparedStatement insert = connection.prepareStatement("INSERT INTO Daily_Partitions VALUES (?, ?, ?)");
    insert.setString(1, datasetPath);//from  ww w .ja  v a  2 s  . c  o m
    insert.setString(2, dailyPartitionPath.toString());
    insert.setTimestamp(3, new Timestamp(partition.getMillis()));

    insert.executeUpdate();

}

From source file:gobblin.runtime.mapreduce.GobblinOutputCommitterTest.java

License:Apache License

@BeforeClass
public void setupWorkUnitFiles() throws IOException {
    this.conf = new Configuration();
    this.fs = FileSystem.getLocal(this.conf);
    this.stagingDirs = Lists.newArrayList();

    // Create a list of WorkUnits to serialize
    WorkUnit wu1 = createAndSetWorkUnit("wu1");
    WorkUnit wu2 = createAndSetWorkUnit("wu2");
    WorkUnit wu3 = createAndSetWorkUnit("wu3");
    WorkUnit wu4 = createAndSetWorkUnit("wu4");

    // Create a MultiWorkUnit to serialize
    MultiWorkUnit mwu1 = MultiWorkUnit.createEmpty();
    mwu1.setProp(ConfigurationKeys.TASK_ID_KEY, System.nanoTime());
    mwu1.addWorkUnits(Arrays.asList(wu3, wu4));

    Path inputDir = new Path(new Path(OUTPUT_PATH, JOB_NAME), "input");

    // Writer each WorkUnit to a separate file under inputDir
    Closer closer = Closer.create();//from   www. ja va  2s  .co  m
    try {
        wu1.write(closer.register(this.fs
                .create(new Path(inputDir, wu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_")
                        .suffix("wu"))));
        wu2.write(closer.register(this.fs
                .create(new Path(inputDir, wu2.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_")
                        .suffix("wu"))));
        mwu1.write(closer.register(this.fs
                .create(new Path(inputDir, mwu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_")
                        .suffix("mwu"))));
    } finally {
        closer.close();
    }
}

From source file:gobblin.runtime.mapreduce.GobblinOutputCommitterTest.java

License:Apache License

/**
 * Helper method to create a {@link WorkUnit}, set it's staging directories, and create the staging directories on the
 * local fs/*from   ww  w. j a  va  2s  . c  om*/
 * @param workUnitName is the name of the {@link WorkUnit} to create
 * @return the {@link WorkUnit} that was created
 * @throws IOException
 */
private WorkUnit createAndSetWorkUnit(String workUnitName) throws IOException {
    WorkUnit wu = WorkUnit.createEmpty();
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.TASK_ID_KEY, 1, 0),
            System.nanoTime());

    Path wuStagingDir = new Path(OUTPUT_PATH,
            JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + STAGING_DIR_NAME);
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 1, 0),
            wuStagingDir.toString());
    this.fs.mkdirs(wuStagingDir);
    this.stagingDirs.add(wuStagingDir);

    Path wuOutputDir = new Path(OUTPUT_PATH,
            JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + OUTPUT_DIR_NAME);
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0),
            wuOutputDir.toString());
    this.fs.mkdirs(wuOutputDir);
    this.stagingDirs.add(wuOutputDir);
    return wu;
}