Example usage for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:org.apache.gobblin.data.management.conversion.hive.task.HiveConverterUtils.java

License:Apache License

/***
 * Get the final table location of format: <final table location>/final
 * @return final table location./*www .j  av  a2 s  . co m*/
 */
public static String getOutputDataLocation(String outputDataLocation) {
    return outputDataLocation + Path.SEPARATOR + PUBLISHED_TABLE_SUBDIRECTORY;
}

From source file:org.apache.gobblin.data.management.conversion.hive.task.HiveConverterUtils.java

License:Apache License

/***
 * Get the staging table location of format: <final table location>/<staging table name>
 * @param outputDataLocation output table data lcoation.
 * @return staging table location.//from   w ww.jav a  2  s.c  o m
 */
public static String getStagingDataLocation(String outputDataLocation, String stagingTableName) {
    return outputDataLocation + Path.SEPARATOR + stagingTableName;
}

From source file:org.apache.gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute./*from ww  w  .j a v  a 2  s  .c om*/
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                if (this.hiveSettings.size() > 0) {
                    hiveJdbcConnector
                            .executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
                }
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        log.warn("Execution failed for query set " + queries.toString(), e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}

From source file:org.apache.gobblin.data.management.copy.hive.HiveDatasetFinder.java

License:Apache License

/**
 * Gets the {@link Config} for this <code>dbAndTable</code>.
 * Cases:/*from www  . j av a  2  s  .  c  o m*/
 * <ul>
 * <li>If {@link #configStoreUri} is available it gets the dataset config from the config store at this uri
 * <li>If {@link #configStoreUri} is not available it uses the job config as dataset config
 * <li>If {@link #datasetConfigPrefix} is specified, only configs with this prefix is returned
 * <li>If {@link #datasetConfigPrefix} is not specified, all configs are returned
 * </ul>
 * @param table of the dataset to get config
 * @return the {@link Config} for <code>dbAndTable</code>
 */
private Config getDatasetConfig(Table table)
        throws ConfigStoreFactoryDoesNotExistsException, ConfigStoreCreationException, URISyntaxException {

    Config datasetConfig;
    Optional<Config> runtimeConfig = ConfigClientUtils.getOptionalRuntimeConfig(properties);
    // Config store enabled
    if (this.configStoreUri.isPresent()) {
        if (runtimeConfig.isPresent()) {
            datasetConfig = this.configClient.getConfig(
                    this.configStoreUri.get() + Path.SEPARATOR + this.configStoreDatasetUriBuilder.apply(table),
                    runtimeConfig);
        } else {
            datasetConfig = this.configClient.getConfig(this.configStoreUri.get() + Path.SEPARATOR
                    + this.configStoreDatasetUriBuilder.apply(table));
        }

        // If config store is not enabled use job config
    } else {
        datasetConfig = this.jobConfig;
    }

    return StringUtils.isBlank(this.datasetConfigPrefix) ? datasetConfig
            : ConfigUtils.getConfig(datasetConfig, this.datasetConfigPrefix, ConfigFactory.empty());
}

From source file:org.apache.gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java

License:Apache License

/**
 * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root
 * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data
 * by moving the file from staging to output directory.
 *
 * @see org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(org.apache.gobblin.data.management.copy.FileAwareInputStream)
 *///  ww  w  . j a v  a  2s  .c  om
@Override
public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile,
        FileAwareInputStream record) throws IOException {
    this.closer.register(inputStream);

    TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream);
    final ReadableByteChannel inputChannel = Channels.newChannel(tarIn);
    TarArchiveEntry tarEntry;

    // flush the first entry in the tar, which is just the root directory
    tarEntry = tarIn.getNextTarEntry();
    String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR);

    log.info("Unarchiving at " + writeAt);

    try {
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {

            // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file
            String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName());
            Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath);
            if (!FileUtils.isSubPath(writeAt.getParent(), tarEntryStagingPath)) {
                throw new IOException(
                        String.format("Extracted file: %s is trying to write outside of output directory: %s",
                                tarEntryStagingPath, writeAt.getParent()));
            }

            if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
                this.fs.mkdirs(tarEntryStagingPath);
            } else if (!tarEntry.isDirectory()) {
                FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true);
                final WritableByteChannel outputChannel = Channels.newChannel(out);
                try {
                    StreamCopier copier = new StreamCopier(inputChannel, outputChannel);
                    if (isInstrumentationEnabled()) {
                        copier.withCopySpeedMeter(this.copySpeedMeter);
                    }
                    this.bytesWritten.addAndGet(copier.copy());
                    if (isInstrumentationEnabled()) {
                        log.info("File {}: copied {} bytes, average rate: {} B/s",
                                copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(),
                                this.copySpeedMeter.getMeanRate());
                    } else {
                        log.info("File {} copied.", copyableFile.getOrigin().getPath());
                    }
                } finally {
                    out.close();
                    outputChannel.close();
                }
            }
        }
    } finally {
        tarIn.close();
        inputChannel.close();
        inputStream.close();
    }
}

From source file:org.apache.gobblin.source.DatePartitionedNestedRetriever.java

License:Apache License

private String constructPartitionPath(DateTime date) {
    StringBuilder pathBuilder = new StringBuilder();

    if (!this.sourcePartitionPrefix.isEmpty()) {
        pathBuilder.append(this.sourcePartitionPrefix);
        pathBuilder.append(Path.SEPARATOR);
    }//from   w  ww  . j  a  v  a  2s.  co m

    pathBuilder.append(this.partitionPatternFormatter.print(date));

    if (!this.sourcePartitionSuffix.isEmpty()) {
        pathBuilder.append(Path.SEPARATOR);
        pathBuilder.append(this.sourcePartitionSuffix);
    }

    return pathBuilder.toString();
}

From source file:org.apache.gobblin.source.extractor.DatePartitionedAvroFileExtractorTest.java

License:Apache License

@Test
public void testJobStateNotCopiedToWorkUnit() {

    DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource();

    SourceState state = new SourceState();
    state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI);
    state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY,
            OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2);

    state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN);
    state.setProp("date.partitioned.source.min.watermark.value",
            DateTimeFormat.forPattern(DATE_PATTERN).print(this.startDateTime.minusMinutes(1)));
    state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY);
    state.setProp("date.partitioned.source.partition.prefix", PREFIX);
    state.setProp("date.partitioned.source.partition.suffix", SUFFIX);

    String dummyKey = "dummy.job.config";
    state.setProp(dummyKey, "dummy");

    List<WorkUnit> workunits = source.getWorkunits(state);

    Assert.assertEquals(workunits.size(), 4);
    for (WorkUnit wu : workunits) {
        if (wu instanceof MultiWorkUnit) {
            for (WorkUnit workUnit : ((MultiWorkUnit) wu).getWorkUnits()) {
                Assert.assertFalse(workUnit.contains(dummyKey));
            }/*from w  w  w  . ja v a  2  s . co  m*/
        } else {
            Assert.assertFalse(wu.contains(dummyKey));
        }
    }
}

From source file:org.apache.gobblin.source.extractor.DatePartitionedAvroFileExtractorTest.java

License:Apache License

@Test
public void testReadPartitionsByMinuteWithLeadtime() throws IOException, DataRecordException {

    DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource();

    SourceState state = new SourceState();
    state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI);
    state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY,
            OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY);
    state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2);

    state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN);
    state.setProp("date.partitioned.source.min.watermark.value",
            DateTimeFormat.forPattern(DATE_PATTERN).print(this.startDateTime.minusMinutes(1)));
    state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY);
    state.setProp("date.partitioned.source.partition.prefix", PREFIX);
    state.setProp("date.partitioned.source.partition.suffix", SUFFIX);
    state.setProp("date.partitioned.source.partition.lead_time.size", "3");
    state.setProp("date.partitioned.source.partition.lead_time.granularity", "HOUR");

    /*/*from   w  w w.j av a2  s . c  o  m*/
     * Since lead time is 3 hours, only the first WorkUnit (which is 6 hours old, rest are 2hrs) should get
     * picked up
     */
    List<WorkUnit> workunits = source.getWorkunits(state);

    Assert.assertEquals(workunits.size(), 1);
    verifyWorkUnits(workunits, workunits.size());
}

From source file:org.apache.gobblin.util.WriterUtils.java

License:Apache License

/**
 * Creates {@link Path} for case {@link WriterFilePathType#NAMESPACE_TABLE} with configurations
 * {@link ConfigurationKeys#EXTRACT_NAMESPACE_NAME_KEY} and {@link ConfigurationKeys#EXTRACT_TABLE_NAME_KEY}
 * @param state//from w w  w.  j  a  v a 2  s  . com
 * @return a path
 */
public static Path getNamespaceTableWriterFilePath(State state) {
    Preconditions.checkArgument(state.contains(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY));
    Preconditions.checkArgument(state.contains(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY));

    String namespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY).replaceAll("\\.",
            Path.SEPARATOR);
    return new Path(namespace + Path.SEPARATOR + state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY));
}

From source file:org.apache.gobblin.writer.AvroHdfsDataWriterTest.java

License:Apache License

@Test
public void testWrite() throws IOException {
    // Write all test records
    for (String record : TestConstants.JSON_RECORDS) {
        this.writer.write(convertRecord(record));
    }//from   w ww  .  j  a v a  2  s. c o m

    Assert.assertEquals(this.writer.recordsWritten(), 3);

    this.writer.close();
    this.writer.commit();

    File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath,
            TestConstants.TEST_FILE_NAME);
    DataFileReader<GenericRecord> reader = new DataFileReader<>(outputFile,
            new GenericDatumReader<GenericRecord>(this.schema));

    // Read the records back and assert they are identical to the ones written
    GenericRecord user1 = reader.next();
    // Strings are in UTF8, so we have to call toString() here and below
    Assert.assertEquals(user1.get("name").toString(), "Alyssa");
    Assert.assertEquals(user1.get("favorite_number"), 256);
    Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

    GenericRecord user2 = reader.next();
    Assert.assertEquals(user2.get("name").toString(), "Ben");
    Assert.assertEquals(user2.get("favorite_number"), 7);
    Assert.assertEquals(user2.get("favorite_color").toString(), "red");

    GenericRecord user3 = reader.next();
    Assert.assertEquals(user3.get("name").toString(), "Charlie");
    Assert.assertEquals(user3.get("favorite_number"), 68);
    Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

    reader.close();

    FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY));
    Assert.assertEquals(metrics.fileInfos.size(), 1);
    FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next();

    Assert.assertEquals(fileInfo.fileName, TestConstants.TEST_FILE_NAME);
    Assert.assertEquals(fileInfo.numRecords, 3);
    Assert.assertNull(metrics.partitionInfo.partitionKey);
    Assert.assertEquals(metrics.partitionInfo.branchId, 0);
}