List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:gobblin.util.io.StreamUtils.java
License:Apache License
/** * Convert a {@link Path} to a {@link String} and make sure it is properly formatted to be recognized as a file * by {@link TarArchiveEntry}./* w w w . j a v a2s.c om*/ */ private static String formatPathToFile(Path path) { return StringUtils.removeEnd(path.toString(), Path.SEPARATOR); }
From source file:gobblin.util.JobLauncherUtilsTest.java
License:Apache License
@Test public void testDeleteStagingDataWithOutWriterFilePath() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); String branchName0 = "fork_0"; String branchName1 = "fork_1"; String namespace = "gobblin.test"; String tableName = "test-table"; Path rootDir = new Path("gobblin-test/job-launcher-utils-test"); Path writerStagingDir0 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName0); Path writerStagingDir1 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName1); Path writerOutputDir0 = new Path(rootDir, "output" + Path.SEPARATOR + branchName0); Path writerOutputDir1 = new Path(rootDir, "output" + Path.SEPARATOR + branchName1); try {//from w ww . ja v a 2 s. c o m SourceState sourceState = new SourceState(); WorkUnitState state = new WorkUnitState( WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName))); state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "2"); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 0), branchName0); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 1), branchName1); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0), ConfigurationKeys.LOCAL_FS_URI); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1), ConfigurationKeys.LOCAL_FS_URI); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0), writerStagingDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1), writerStagingDir1.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0), writerOutputDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1), writerOutputDir1.toString()); Path writerStagingPath0 = new Path(writerStagingDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerStagingPath0); Path writerStagingPath1 = new Path(writerStagingDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerStagingPath1); Path writerOutputPath0 = new Path(writerOutputDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerOutputPath0); Path writerOutputPath1 = new Path(writerOutputDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerOutputPath1); JobLauncherUtils.cleanTaskStagingData(state, LoggerFactory.getLogger(JobLauncherUtilsTest.class)); Assert.assertFalse(fs.exists(writerStagingPath0)); Assert.assertFalse(fs.exists(writerStagingPath1)); Assert.assertFalse(fs.exists(writerOutputPath0)); Assert.assertFalse(fs.exists(writerOutputPath1)); } finally { fs.delete(rootDir, true); } }
From source file:gobblin.util.PathUtils.java
License:Apache License
/** * Removes the leading slash if present. * */// ww w . j a v a 2 s . c om public static Path withoutLeadingSeparator(Path path) { return new Path(StringUtils.removeStart(path.toString(), Path.SEPARATOR)); }
From source file:gobblin.writer.AvroHdfsDataWriterTest.java
License:Apache License
@Test public void testWrite() throws IOException { // Write all test records for (String record : TestConstants.JSON_RECORDS) { this.writer.write(convertRecord(record)); }//from w w w .j av a2s.c o m Assert.assertEquals(this.writer.recordsWritten(), 3); this.writer.close(); this.writer.commit(); File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME); DataFileReader<GenericRecord> reader = new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>(this.schema)); // Read the records back and assert they are identical to the ones written GenericRecord user1 = reader.next(); // Strings are in UTF8, so we have to call toString() here and below Assert.assertEquals(user1.get("name").toString(), "Alyssa"); Assert.assertEquals(user1.get("favorite_number"), 256); Assert.assertEquals(user1.get("favorite_color").toString(), "yellow"); GenericRecord user2 = reader.next(); Assert.assertEquals(user2.get("name").toString(), "Ben"); Assert.assertEquals(user2.get("favorite_number"), 7); Assert.assertEquals(user2.get("favorite_color").toString(), "red"); GenericRecord user3 = reader.next(); Assert.assertEquals(user3.get("name").toString(), "Charlie"); Assert.assertEquals(user3.get("favorite_number"), 68); Assert.assertEquals(user3.get("favorite_color").toString(), "blue"); reader.close(); }
From source file:gobblin.writer.AvroHdfsTimePartitionedWriter.java
License:Open Source License
/** * Given a timestamp of type long, convert the timestamp to a {@link Path} using the {@link #timestampToPathFormatter}. * @param timestamp is the timestamp that needs to be converted to a path. * @return a {@link Path} based on the value of the timestamp. *//*from w w w. jav a2 s. c o m*/ private Path getPathForColumnValue(long timestamp) { return new Path(this.datasetName, partitionLevel + Path.SEPARATOR + timestampToPathFormatter.print(timestamp)); }
From source file:gobblin.writer.AvroHdfsTimePartitionedWriterTest.java
License:Open Source License
@Test public void testWriter() throws IOException { // Write three records, each should be written to a different file GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema); // This timestamp corresponds to 2015/01/01 genericRecordBuilder.set("timestamp", 1420099200000l); this.writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/02 genericRecordBuilder.set("timestamp", 1420185600000l); this.writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/03 genericRecordBuilder.set("timestamp", 1420272000000l); this.writer.write(genericRecordBuilder.build()); // Check that the writer reports that 3 records have been written Assert.assertEquals(this.writer.recordsWritten(), 3); this.writer.close(); this.writer.commit(); // Check that 3 files were created Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3); // Check if each file exists, and in the correct location File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH + Path.SEPARATOR + ConfigurationKeys.DEFAULT_WRITER_PARTITION_LEVEL); Assert.assertTrue(baseOutputDir.exists()); File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150101.exists()); File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150102.exists()); File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150103.exists()); }
From source file:gobblin.writer.AvroToParquetHdfsDataWriterTest.java
License:Open Source License
@Test public void testWrite() throws IOException { // Write all test records for (String record : TestConstants.JSON_RECORDS) { writer.write(convertRecord(record)); }// w w w . ja v a 2s . c o m Assert.assertEquals(writer.recordsWritten(), 3); writer.close(); writer.commit(); File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME); ParquetReader<GenericRecord> reader = ParquetReader .builder(new AvroReadSupport<GenericRecord>(), new Path(outputFile.toURI())).build(); // Read the records back and assert they are identical to the ones written GenericRecord user1 = reader.read(); // Strings are in UTF8, so we have to call toString() here and below Assert.assertEquals(user1.get("name").toString(), "Alyssa"); Assert.assertEquals(user1.get("favorite_number"), 256); Assert.assertEquals(user1.get("favorite_color").toString(), "yellow"); GenericRecord user2 = reader.read(); Assert.assertEquals(user2.get("name").toString(), "Ben"); Assert.assertEquals(user2.get("favorite_number"), 7); Assert.assertEquals(user2.get("favorite_color").toString(), "red"); GenericRecord user3 = reader.read(); Assert.assertEquals(user3.get("name").toString(), "Charlie"); Assert.assertEquals(user3.get("favorite_number"), 68); Assert.assertEquals(user3.get("favorite_color").toString(), "blue"); reader.close(); }
From source file:gobblin.writer.AvroToParquetHdfsTimePartitionedWriterTest.java
License:Open Source License
@Test public void testWriter() throws IOException { // Write three records, each should be written to a different file GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema); // This timestamp corresponds to 2015/01/01 genericRecordBuilder.set("timestamp", 1420099200000l); writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/02 genericRecordBuilder.set("timestamp", 1420185600000l); writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/03 genericRecordBuilder.set("timestamp", 1420272000000l); writer.write(genericRecordBuilder.build()); // Check that the writer reports that 3 records have been written Assert.assertEquals(writer.recordsWritten(), 3); writer.close();//from w w w . j a v a 2s . com writer.commit(); // Check that 3 files were created Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "parquet" }, true).size(), 3); // Check if each file exists, and in the correct location File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH + Path.SEPARATOR + ConfigurationKeys.DEFAULT_WRITER_PARTITION_LEVEL); Assert.assertTrue(baseOutputDir.exists()); File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150101.exists()); File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150102.exists()); File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150103.exists()); }
From source file:gobblin.writer.partitioner.TimeBasedAvroWriterPartitionerTest.java
License:Apache License
@Test public void testWriter() throws IOException { // Write three records, each should be written to a different file GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema); // This timestamp corresponds to 2015/01/01 genericRecordBuilder.set("timestamp", 1420099200000l); this.writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/02 genericRecordBuilder.set("timestamp", 1420185600000l); this.writer.write(genericRecordBuilder.build()); // This timestamp corresponds to 2015/01/03 genericRecordBuilder.set("timestamp", 1420272000000l); this.writer.write(genericRecordBuilder.build()); // Check that the writer reports that 3 records have been written Assert.assertEquals(this.writer.recordsWritten(), 3); this.writer.close(); this.writer.commit(); // Check that 3 files were created Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3); // Check if each file exists, and in the correct location File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH); Assert.assertTrue(baseOutputDir.exists()); File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150101.exists()); File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150102.exists()); File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150103.exists()); }
From source file:gobblin.writer.SimpleDataWriterTest.java
License:Apache License
/** * If the staging file exists, the simple data writer should overwrite its contents. * * @throws IOException/*from ww w . j av a 2 s. c o m*/ */ @Test public void testOverwriteExistingStagingFile() throws IOException { byte[] randomBytesStage = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; byte[] randomBytesWrite = { 11, 12, 13, 14, 15 }; Path stagingFile = new Path(TestConstants.TEST_STAGING_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME + "." + TestConstants.TEST_WRITER_ID + "." + "tmp"); Configuration conf = new Configuration(); // Add all job configuration properties so they are picked up by Hadoop for (String key : properties.getPropertyNames()) { conf.set(key, properties.getProp(key)); } FileSystem fs = FileSystem.get(URI.create(TestConstants.TEST_FS_URI), conf); OutputStream os = fs.create(stagingFile); os.write(randomBytesStage); os.flush(); os.close(); SimpleDataWriter writer = buildSimpleDataWriter(); writer.write(randomBytesWrite); writer.close(); writer.commit(); Assert.assertEquals(writer.recordsWritten(), 1); Assert.assertEquals(writer.bytesWritten(), randomBytesWrite.length + 1); File writeFile = new File(writer.getOutputFilePath()); int c, i = 0; InputStream is = new FileInputStream(writeFile); while ((c = is.read()) != -1) { if (i == 5) { Assert.assertEquals(c, (byte) newLine); // the last byte should be newline i++; continue; } Assert.assertEquals(randomBytesWrite[i], c); i++; } }