List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Renames from to to if to doesn't exist in a non-thread-safe way. * * @param fs filesystem where rename will be executed. * @param from origin {@link Path}./*from ww w . jav a2 s.co m*/ * @param to target {@link Path}. * @return true if rename succeeded, false if the target already exists. * @throws IOException if rename failed for reasons other than target exists. */ public static boolean unsafeRenameIfNotExists(FileSystem fs, Path from, Path to) throws IOException { if (!fs.exists(to)) { if (!fs.exists(to.getParent())) { fs.mkdirs(to.getParent()); } if (!renamePathHandleLocalFSRace(fs, from, to)) { if (!fs.exists(to)) { throw new IOException(String.format("Failed to rename %s to %s.", from, to)); } return false; } return true; } return false; }
From source file:gobblin.util.HadoopUtilsTest.java
License:Apache License
@Test public void testRenameRecursively() throws Exception { final Path hadoopUtilsTestDir = new Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir"); FileSystem fs = FileSystem.getLocal(new Configuration()); try {//from w w w . ja v a 2 s . c o m fs.mkdirs(hadoopUtilsTestDir); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRename/a/b/c")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging/a/b/c")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging/a/b/c/e")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging/a/b/c/t1.txt")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging/a/b/c/e/t2.txt")); HadoopUtils.renameRecursively(fs, new Path(hadoopUtilsTestDir, "testRenameStaging"), new Path(hadoopUtilsTestDir, "testRename")); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testRename/a/b/c/t1.txt"))); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testRename/a/b/c/e/t2.txt"))); } finally { fs.delete(hadoopUtilsTestDir, true); } }
From source file:gobblin.util.HadoopUtilsTest.java
License:Apache License
@Test(groups = { "performance" }) public void testRenamePerformance() throws Exception { FileSystem fs = Mockito.mock(FileSystem.class); Path sourcePath = new Path("/source"); Path s1 = new Path(sourcePath, "d1"); FileStatus[] sourceStatuses = new FileStatus[10000]; FileStatus[] targetStatuses = new FileStatus[1000]; for (int i = 0; i < sourceStatuses.length; i++) { sourceStatuses[i] = getFileStatus(new Path(s1, "path" + i), false); }//from w w w.jav a 2s . c om for (int i = 0; i < targetStatuses.length; i++) { targetStatuses[i] = getFileStatus(new Path(s1, "path" + i), false); } Mockito.when(fs.getUri()).thenReturn(new URI("file:///")); Mockito.when(fs.getFileStatus(sourcePath)).thenAnswer(getDelayedAnswer(getFileStatus(sourcePath, true))); Mockito.when(fs.exists(sourcePath)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.listStatus(sourcePath)) .thenAnswer(getDelayedAnswer(new FileStatus[] { getFileStatus(s1, true) })); Mockito.when(fs.exists(s1)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.listStatus(s1)).thenAnswer(getDelayedAnswer(sourceStatuses)); Path target = new Path("/target"); Path s1Target = new Path(target, "d1"); Mockito.when(fs.exists(target)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.exists(s1Target)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.mkdirs(Mockito.any(Path.class))).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.rename(Mockito.any(Path.class), Mockito.any(Path.class))) .thenAnswer(getDelayedAnswer(true)); HadoopUtils.renameRecursively(fs, sourcePath, target); }
From source file:gobblin.util.HadoopUtilsTest.java
License:Apache License
@Test public void testSafeRenameRecursively() throws Exception { final Logger log = LoggerFactory.getLogger("HadoopUtilsTest.testSafeRenameRecursively"); final Path hadoopUtilsTestDir = new Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir"); final FileSystem fs = FileSystem.getLocal(new Configuration()); try {//from w ww . j a v a 2s .c o m // do many iterations to catch rename race conditions for (int i = 0; i < 100; i++) { fs.mkdirs(hadoopUtilsTestDir); fs.mkdirs(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging1/a/b/c")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging1/a/b/c/e")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging1/a/b/c/t1.txt")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging1/a/b/c/e/t2.txt")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging2/a/b/c")); fs.mkdirs(new Path(hadoopUtilsTestDir, "testRenameStaging2/a/b/c/e")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging2/a/b/c/t3.txt")); fs.create(new Path(hadoopUtilsTestDir, "testRenameStaging2/a/b/c/e/t4.txt")); ExecutorService executorService = Executors.newFixedThreadPool(2); final Throwable[] runnableErrors = { null, null }; Future<?> renameFuture = executorService.submit(new Runnable() { @Override public void run() { try { HadoopUtils.renameRecursively(fs, new Path(hadoopUtilsTestDir, "testRenameStaging1"), new Path(hadoopUtilsTestDir, "testSafeRename")); } catch (Throwable e) { log.error("Rename error: " + e, e); runnableErrors[0] = e; } } }); Future<?> safeRenameFuture = executorService.submit(new Runnable() { @Override public void run() { try { HadoopUtils.safeRenameRecursively(fs, new Path(hadoopUtilsTestDir, "testRenameStaging2"), new Path(hadoopUtilsTestDir, "testSafeRename")); } catch (Throwable e) { log.error("Safe rename error: " + e, e); runnableErrors[1] = e; } } }); // Wait for the executions to complete renameFuture.get(10, TimeUnit.SECONDS); safeRenameFuture.get(10, TimeUnit.SECONDS); executorService.shutdownNow(); Assert.assertNull(runnableErrors[0], "Runnable 0 error: " + runnableErrors[0]); Assert.assertNull(runnableErrors[1], "Runnable 1 error: " + runnableErrors[1]); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c/t1.txt"))); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c/t3.txt"))); Assert.assertTrue(!fs.exists(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c/e/e/t2.txt"))); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c/e/t2.txt"))); Assert.assertTrue(fs.exists(new Path(hadoopUtilsTestDir, "testSafeRename/a/b/c/e/t4.txt"))); fs.delete(hadoopUtilsTestDir, true); } } finally { fs.delete(hadoopUtilsTestDir, true); } }
From source file:gobblin.util.HeapDumpForTaskUtils.java
License:Apache License
/** * Generate the dumpScript, which is used when OOM error is thrown during task execution. * The current content dumpScript puts the .prof files to the DUMP_FOLDER within the same directory of the dumpScript. * * User needs to add the following options to the task java.opts: * * -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./heapFileName.hprof -XX:OnOutOfMemoryError=./dumpScriptFileName * * @param dumpScript The path to the dumpScript, which needs to be added to the Distributed cache. * To use it, simply put the path of dumpScript to the gobblin config: job.hdfs.files. * @param fs File system//www.j av a 2 s.c o m * @param heapFileName the name of the .prof file. * @param chmod chmod for the dump script. For hdfs file, e.g, "hadoop fs -chmod 755" * @throws IOException */ public static void generateDumpScript(Path dumpScript, FileSystem fs, String heapFileName, String chmod) throws IOException { if (fs.exists(dumpScript)) { LOG.info("Heap dump script already exists: " + dumpScript); return; } try (BufferedWriter scriptWriter = new BufferedWriter( new OutputStreamWriter(fs.create(dumpScript), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))) { Path dumpDir = new Path(dumpScript.getParent(), DUMP_FOLDER); if (!fs.exists(dumpDir)) { fs.mkdirs(dumpDir); } scriptWriter.write("#!/bin/sh\n"); scriptWriter.write("if [ -n \"$HADOOP_PREFIX\" ]; then\n"); scriptWriter.write(" ${HADOOP_PREFIX}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n"); scriptWriter.write("else\n"); scriptWriter.write(" ${HADOOP_HOME}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n"); scriptWriter.write("fi\n"); } catch (IOException ioe) { LOG.error("Heap dump script is not generated successfully."); if (fs.exists(dumpScript)) { fs.delete(dumpScript, true); } throw ioe; } Runtime.getRuntime().exec(chmod + " " + dumpScript); }
From source file:gobblin.util.io.StreamUtilsTest.java
License:Apache License
@Test public void testTarDir() throws IOException { FileSystem localFs = FileSystem.getLocal(new Configuration()); // Set of expected Paths to be in the resulting tar file Set<Path> expectedPaths = Sets.newHashSet(); // Create input directory Path testInDir = new Path("testDir"); expectedPaths.add(testInDir);/*from w w w . j a v a 2 s . c o m*/ // Create output file path Path testOutFile = new Path("testTarOut" + UUID.randomUUID() + ".tar.gz"); try { localFs.mkdirs(testInDir); // Create a test file path Path testFile1 = new Path(testInDir, "testFile1"); expectedPaths.add(testFile1); FSDataOutputStream testFileOut1 = localFs.create(testFile1); testFileOut1.close(); // Create a test file path Path testFile2 = new Path(testInDir, "testFile2"); expectedPaths.add(testFile2); FSDataOutputStream testFileOut2 = localFs.create(testFile2); testFileOut2.close(); // tar the input directory to the specific output file StreamUtils.tar(localFs, testInDir, testOutFile); // Confirm the contents of the tar file are valid try (TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream( new GzipCompressorInputStream(localFs.open(testOutFile)))) { TarArchiveEntry tarArchiveEntry; while (null != (tarArchiveEntry = tarArchiveInputStream.getNextTarEntry())) { assertThat(new Path(tarArchiveEntry.getName()), isIn(expectedPaths)); } } } finally { if (localFs.exists(testInDir)) { localFs.delete(testInDir, true); } if (localFs.exists(testOutFile)) { localFs.delete(testOutFile, true); } } }
From source file:gobblin.util.JobLauncherUtilsTest.java
License:Apache License
@Test public void testDeleteStagingData() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); Path rootDir = new Path("gobblin-test/job-launcher-utils-test"); Path writerStagingDir0 = new Path(rootDir, "staging/fork_0"); Path writerStagingDir1 = new Path(rootDir, "staging/fork_1"); Path writerOutputDir0 = new Path(rootDir, "output/fork_0"); Path writerOutputDir1 = new Path(rootDir, "output/fork_1"); String writerPath0 = "test0"; String writerPath1 = "test1"; try {/*from www . ja v a 2 s .c o m*/ WorkUnitState state = new WorkUnitState(); state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "2"); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0), ConfigurationKeys.LOCAL_FS_URI); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1), ConfigurationKeys.LOCAL_FS_URI); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0), writerStagingDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1), writerStagingDir1.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0), writerOutputDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1), writerOutputDir1.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PATH, 2, 0), writerPath0); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PATH, 2, 1), writerPath1); Path writerStagingPath0 = new Path(writerStagingDir0, writerPath0); fs.mkdirs(writerStagingPath0); Path writerStagingPath1 = new Path(writerStagingDir1, writerPath1); fs.mkdirs(writerStagingPath1); Path writerOutputPath0 = new Path(writerOutputDir0, writerPath0); fs.mkdirs(writerOutputPath0); Path writerOutputPath1 = new Path(writerOutputDir1, writerPath1); fs.mkdirs(writerOutputPath1); JobLauncherUtils.cleanTaskStagingData(state, LoggerFactory.getLogger(JobLauncherUtilsTest.class)); Assert.assertFalse(fs.exists(writerStagingPath0)); Assert.assertFalse(fs.exists(writerStagingPath1)); Assert.assertFalse(fs.exists(writerOutputPath0)); Assert.assertFalse(fs.exists(writerOutputPath1)); } finally { fs.delete(rootDir, true); } }
From source file:gobblin.util.JobLauncherUtilsTest.java
License:Apache License
@Test public void testDeleteStagingDataWithOutWriterFilePath() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); String branchName0 = "fork_0"; String branchName1 = "fork_1"; String namespace = "gobblin.test"; String tableName = "test-table"; Path rootDir = new Path("gobblin-test/job-launcher-utils-test"); Path writerStagingDir0 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName0); Path writerStagingDir1 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName1); Path writerOutputDir0 = new Path(rootDir, "output" + Path.SEPARATOR + branchName0); Path writerOutputDir1 = new Path(rootDir, "output" + Path.SEPARATOR + branchName1); try {/*from w w w .j a v a 2s .c o m*/ SourceState sourceState = new SourceState(); WorkUnitState state = new WorkUnitState( WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName))); state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "2"); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 0), branchName0); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 1), branchName1); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0), ConfigurationKeys.LOCAL_FS_URI); state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1), ConfigurationKeys.LOCAL_FS_URI); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0), writerStagingDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1), writerStagingDir1.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0), writerOutputDir0.toString()); state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1), writerOutputDir1.toString()); Path writerStagingPath0 = new Path(writerStagingDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerStagingPath0); Path writerStagingPath1 = new Path(writerStagingDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerStagingPath1); Path writerOutputPath0 = new Path(writerOutputDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0)); fs.mkdirs(writerOutputPath0); Path writerOutputPath1 = new Path(writerOutputDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1)); fs.mkdirs(writerOutputPath1); JobLauncherUtils.cleanTaskStagingData(state, LoggerFactory.getLogger(JobLauncherUtilsTest.class)); Assert.assertFalse(fs.exists(writerStagingPath0)); Assert.assertFalse(fs.exists(writerStagingPath1)); Assert.assertFalse(fs.exists(writerOutputPath0)); Assert.assertFalse(fs.exists(writerOutputPath1)); } finally { fs.delete(rootDir, true); } }
From source file:gobblin.yarn.GobblinYarnLogSource.java
License:Apache License
private Path getHdfsLogDir(ContainerId containerId, FileSystem destFs, Path appWorkDir) throws IOException { Path logRootDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.APP_LOGS_DIR_NAME); if (!destFs.exists(logRootDir)) { destFs.mkdirs(logRootDir); }//from w w w . ja va 2 s . c o m return new Path(logRootDir, containerId.toString()); }
From source file:hadoopdemo.Hadoop.java
private void addFolderButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_addFolderButtonActionPerformed try {/* w w w .j a v a2s. c om*/ String newFolder = newFolderTextField.getText(); System.out.println(newFolder); Configuration conf = new Configuration(); conf.addResource(new Path("/home/ado/hadoop-2.7.3/etc/hadoop/core-site.xml")); conf.addResource(new Path("/home/ado/hadoop-2.7.3/etc/hadoop/hdfs-site.xml")); conf.addResource(new Path("/home/ado/hadoop-2.7.3/etc/hadoop/mapred-site.xml")); FileSystem fileSystem = FileSystem.get(conf); fileSystem.mkdirs(new Path(newFolder)); } catch (IOException ex) { Logger.getLogger(Hadoop.class.getName()).log(Level.SEVERE, null, ex); } }