Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:gobblin.util.io.StreamUtils.java

License:Apache License

/**
 * Convert a {@link Path} to a {@link String} and make sure it is properly formatted to be recognized as a file
 * by {@link TarArchiveEntry}./* w w  w  .  j  a  v a2s.c  om*/
 */
private static String formatPathToFile(Path path) {
    return StringUtils.removeEnd(path.toString(), Path.SEPARATOR);
}

From source file:gobblin.util.JobLauncherUtilsTest.java

License:Apache License

@Test
public void testDeleteStagingDataWithOutWriterFilePath() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());

    String branchName0 = "fork_0";
    String branchName1 = "fork_1";

    String namespace = "gobblin.test";
    String tableName = "test-table";

    Path rootDir = new Path("gobblin-test/job-launcher-utils-test");

    Path writerStagingDir0 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName0);
    Path writerStagingDir1 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName1);
    Path writerOutputDir0 = new Path(rootDir, "output" + Path.SEPARATOR + branchName0);
    Path writerOutputDir1 = new Path(rootDir, "output" + Path.SEPARATOR + branchName1);

    try {//from   w  ww .  ja  v  a 2 s. c  o m
        SourceState sourceState = new SourceState();
        WorkUnitState state = new WorkUnitState(
                WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)));

        state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "2");
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 0),
                branchName0);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 1),
                branchName1);

        state.setProp(
                ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0),
                ConfigurationKeys.LOCAL_FS_URI);
        state.setProp(
                ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1),
                ConfigurationKeys.LOCAL_FS_URI);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0),
                writerStagingDir0.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1),
                writerStagingDir1.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0),
                writerOutputDir0.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1),
                writerOutputDir1.toString());

        Path writerStagingPath0 = new Path(writerStagingDir0,
                ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0));
        fs.mkdirs(writerStagingPath0);

        Path writerStagingPath1 = new Path(writerStagingDir1,
                ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1));
        fs.mkdirs(writerStagingPath1);

        Path writerOutputPath0 = new Path(writerOutputDir0,
                ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0));
        fs.mkdirs(writerOutputPath0);

        Path writerOutputPath1 = new Path(writerOutputDir1,
                ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1));
        fs.mkdirs(writerOutputPath1);

        JobLauncherUtils.cleanTaskStagingData(state, LoggerFactory.getLogger(JobLauncherUtilsTest.class));

        Assert.assertFalse(fs.exists(writerStagingPath0));
        Assert.assertFalse(fs.exists(writerStagingPath1));
        Assert.assertFalse(fs.exists(writerOutputPath0));
        Assert.assertFalse(fs.exists(writerOutputPath1));
    } finally {
        fs.delete(rootDir, true);
    }
}

From source file:gobblin.util.PathUtils.java

License:Apache License

/**
 * Removes the leading slash if present.
 *
 */// ww w .  j a v  a  2  s  . c om
public static Path withoutLeadingSeparator(Path path) {
    return new Path(StringUtils.removeStart(path.toString(), Path.SEPARATOR));
}

From source file:gobblin.writer.AvroHdfsDataWriterTest.java

License:Apache License

@Test
public void testWrite() throws IOException {
    // Write all test records
    for (String record : TestConstants.JSON_RECORDS) {
        this.writer.write(convertRecord(record));
    }//from  w  w  w  .j av a2s.c  o  m

    Assert.assertEquals(this.writer.recordsWritten(), 3);

    this.writer.close();
    this.writer.commit();

    File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath,
            TestConstants.TEST_FILE_NAME);
    DataFileReader<GenericRecord> reader = new DataFileReader<>(outputFile,
            new GenericDatumReader<GenericRecord>(this.schema));

    // Read the records back and assert they are identical to the ones written
    GenericRecord user1 = reader.next();
    // Strings are in UTF8, so we have to call toString() here and below
    Assert.assertEquals(user1.get("name").toString(), "Alyssa");
    Assert.assertEquals(user1.get("favorite_number"), 256);
    Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

    GenericRecord user2 = reader.next();
    Assert.assertEquals(user2.get("name").toString(), "Ben");
    Assert.assertEquals(user2.get("favorite_number"), 7);
    Assert.assertEquals(user2.get("favorite_color").toString(), "red");

    GenericRecord user3 = reader.next();
    Assert.assertEquals(user3.get("name").toString(), "Charlie");
    Assert.assertEquals(user3.get("favorite_number"), 68);
    Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

    reader.close();
}

From source file:gobblin.writer.AvroHdfsTimePartitionedWriter.java

License:Open Source License

/**
 * Given a timestamp of type long, convert the timestamp to a {@link Path} using the {@link #timestampToPathFormatter}.
 * @param timestamp is the timestamp that needs to be converted to a path.
 * @return a {@link Path} based on the value of the timestamp.
 *//*from w  w  w.  jav  a2 s.  c o  m*/
private Path getPathForColumnValue(long timestamp) {
    return new Path(this.datasetName,
            partitionLevel + Path.SEPARATOR + timestampToPathFormatter.print(timestamp));
}

From source file:gobblin.writer.AvroHdfsTimePartitionedWriterTest.java

License:Open Source License

@Test
public void testWriter() throws IOException {

    // Write three records, each should be written to a different file
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);

    // This timestamp corresponds to 2015/01/01
    genericRecordBuilder.set("timestamp", 1420099200000l);
    this.writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/02
    genericRecordBuilder.set("timestamp", 1420185600000l);
    this.writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/03
    genericRecordBuilder.set("timestamp", 1420272000000l);
    this.writer.write(genericRecordBuilder.build());

    // Check that the writer reports that 3 records have been written
    Assert.assertEquals(this.writer.recordsWritten(), 3);

    this.writer.close();
    this.writer.commit();

    // Check that 3 files were created
    Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3);

    // Check if each file exists, and in the correct location
    File baseOutputDir = new File(OUTPUT_DIR,
            BASE_FILE_PATH + Path.SEPARATOR + ConfigurationKeys.DEFAULT_WRITER_PARTITION_LEVEL);
    Assert.assertTrue(baseOutputDir.exists());

    File outputDir20150101 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150101.exists());

    File outputDir20150102 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150102.exists());

    File outputDir20150103 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150103.exists());
}

From source file:gobblin.writer.AvroToParquetHdfsDataWriterTest.java

License:Open Source License

@Test
public void testWrite() throws IOException {
    // Write all test records
    for (String record : TestConstants.JSON_RECORDS) {
        writer.write(convertRecord(record));
    }// w  w w .  ja v a 2s  .  c  o  m

    Assert.assertEquals(writer.recordsWritten(), 3);

    writer.close();
    writer.commit();

    File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath,
            TestConstants.TEST_FILE_NAME);

    ParquetReader<GenericRecord> reader = ParquetReader
            .builder(new AvroReadSupport<GenericRecord>(), new Path(outputFile.toURI())).build();

    // Read the records back and assert they are identical to the ones written
    GenericRecord user1 = reader.read();
    // Strings are in UTF8, so we have to call toString() here and below
    Assert.assertEquals(user1.get("name").toString(), "Alyssa");
    Assert.assertEquals(user1.get("favorite_number"), 256);
    Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

    GenericRecord user2 = reader.read();
    Assert.assertEquals(user2.get("name").toString(), "Ben");
    Assert.assertEquals(user2.get("favorite_number"), 7);
    Assert.assertEquals(user2.get("favorite_color").toString(), "red");

    GenericRecord user3 = reader.read();
    Assert.assertEquals(user3.get("name").toString(), "Charlie");
    Assert.assertEquals(user3.get("favorite_number"), 68);
    Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

    reader.close();
}

From source file:gobblin.writer.AvroToParquetHdfsTimePartitionedWriterTest.java

License:Open Source License

@Test
public void testWriter() throws IOException {

    // Write three records, each should be written to a different file
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);

    // This timestamp corresponds to 2015/01/01
    genericRecordBuilder.set("timestamp", 1420099200000l);
    writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/02
    genericRecordBuilder.set("timestamp", 1420185600000l);
    writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/03
    genericRecordBuilder.set("timestamp", 1420272000000l);
    writer.write(genericRecordBuilder.build());

    // Check that the writer reports that 3 records have been written
    Assert.assertEquals(writer.recordsWritten(), 3);

    writer.close();//from w w  w  . j  a v a  2s  . com
    writer.commit();

    // Check that 3 files were created
    Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "parquet" }, true).size(),
            3);

    // Check if each file exists, and in the correct location
    File baseOutputDir = new File(OUTPUT_DIR,
            BASE_FILE_PATH + Path.SEPARATOR + ConfigurationKeys.DEFAULT_WRITER_PARTITION_LEVEL);
    Assert.assertTrue(baseOutputDir.exists());

    File outputDir20150101 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150101.exists());

    File outputDir20150102 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150102.exists());

    File outputDir20150103 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150103.exists());
}

From source file:gobblin.writer.partitioner.TimeBasedAvroWriterPartitionerTest.java

License:Apache License

@Test
public void testWriter() throws IOException {

    // Write three records, each should be written to a different file
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);

    // This timestamp corresponds to 2015/01/01
    genericRecordBuilder.set("timestamp", 1420099200000l);
    this.writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/02
    genericRecordBuilder.set("timestamp", 1420185600000l);
    this.writer.write(genericRecordBuilder.build());

    // This timestamp corresponds to 2015/01/03
    genericRecordBuilder.set("timestamp", 1420272000000l);
    this.writer.write(genericRecordBuilder.build());

    // Check that the writer reports that 3 records have been written
    Assert.assertEquals(this.writer.recordsWritten(), 3);

    this.writer.close();
    this.writer.commit();

    // Check that 3 files were created
    Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3);

    // Check if each file exists, and in the correct location
    File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH);
    Assert.assertTrue(baseOutputDir.exists());

    File outputDir20150101 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150101.exists());

    File outputDir20150102 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150102.exists());

    File outputDir20150103 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150103.exists());
}

From source file:gobblin.writer.SimpleDataWriterTest.java

License:Apache License

/**
 * If the staging file exists, the simple data writer should overwrite its contents.
 *
 * @throws IOException/*from  ww w . j av  a 2 s. c o m*/
 */
@Test
public void testOverwriteExistingStagingFile() throws IOException {
    byte[] randomBytesStage = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 };
    byte[] randomBytesWrite = { 11, 12, 13, 14, 15 };
    Path stagingFile = new Path(TestConstants.TEST_STAGING_DIR + Path.SEPARATOR + this.filePath,
            TestConstants.TEST_FILE_NAME + "." + TestConstants.TEST_WRITER_ID + "." + "tmp");
    Configuration conf = new Configuration();
    // Add all job configuration properties so they are picked up by Hadoop
    for (String key : properties.getPropertyNames()) {
        conf.set(key, properties.getProp(key));
    }
    FileSystem fs = FileSystem.get(URI.create(TestConstants.TEST_FS_URI), conf);

    OutputStream os = fs.create(stagingFile);
    os.write(randomBytesStage);
    os.flush();
    os.close();

    SimpleDataWriter writer = buildSimpleDataWriter();

    writer.write(randomBytesWrite);
    writer.close();
    writer.commit();

    Assert.assertEquals(writer.recordsWritten(), 1);
    Assert.assertEquals(writer.bytesWritten(), randomBytesWrite.length + 1);

    File writeFile = new File(writer.getOutputFilePath());
    int c, i = 0;
    InputStream is = new FileInputStream(writeFile);
    while ((c = is.read()) != -1) {
        if (i == 5) {
            Assert.assertEquals(c, (byte) newLine); // the last byte should be newline
            i++;
            continue;
        }
        Assert.assertEquals(randomBytesWrite[i], c);
        i++;
    }
}