List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob.java
License:Apache License
@Override public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis())); FileOutputFormat.setOutputPath(this, stagedPath); final Thread hook = new Thread(new Runnable() { @Override//from w ww . j a v a2s . c om public void run() { try { killJob(); } catch (IOException e) { e.printStackTrace(); } } }); Runtime.getRuntime().addShutdownHook(hook); final boolean retVal = super.waitForCompletion(verbose); Runtime.getRuntime().removeShutdownHook(hook); if (retVal) { FileSystem fs = actualOutputPath.getFileSystem(getConfiguration()); fs.mkdirs(actualOutputPath); log.info(String.format("Deleting data at old path[%s]", actualOutputPath)); fs.delete(actualOutputPath, true); log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath, actualOutputPath)); return fs.rename(stagedPath, actualOutputPath); } log.warn("retVal was false for some reason..."); return retVal; }
From source file:com.liveramp.cascading_ext.FileSystemHelper.java
License:Apache License
/** * Safely mkdirs a directory by retrying the operation <code>numTries</code> times and sleeping <code>delayBetweenTries</code> milliseconds between each * try. If it still fails, it throws an IOException. * * @param fs the filesystem object * @param dir the directory to be created * @param numTries number of tries to attempt the operation * @param delayBetweenTries the sleep delta between tries in millis * @throws IOException//w w w .j a va 2s . co m */ public static void safeMkdirs(FileSystem fs, Path dir, int numTries, long delayBetweenTries) throws IOException { while (numTries-- > 0) { if (fs.mkdirs(dir)) { return; } else { try { Thread.sleep(delayBetweenTries); } catch (InterruptedException ie) { throw new RuntimeException(ie); } } } throw new IOException("Could not mkdirs the directory \"" + dir + "\"!"); }
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger) throws IOException { if (!fs.exists(srcDir)) { return;//from www.j a va 2 s . co m } if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) { throw new IllegalArgumentException(srcDir + " is not a directory"); } if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) { throw new IllegalArgumentException(dstDir + " is not a directory"); } if (logger.isDebugEnabled()) { logger.debug("Moving contents of: " + srcDir + " to: " + dstDir); } FileStatus[] files = fs.listStatus(srcDir); for (FileStatus file : files) { Path sourcePath = file.getPath(); Path targetPath = new Path(dstDir, file.getPath().getName()); if (logger.isDebugEnabled()) { logger.debug("Moving: " + sourcePath + " to: " + targetPath); } if (!fs.mkdirs(targetPath.getParent())) { throw new IOException("Failed at creating directory " + targetPath.getParent()); } if (!fs.rename(sourcePath, targetPath)) { throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath); } } fs.delete(srcDir); }
From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java
License:Apache License
public static void commitJob(String domainName, JobConf conf) throws IOException { Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf)); Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf)); FileSystem fs = outputPath.getFileSystem(conf); // Create outputPath fs.mkdirs(outputPath); // Move temporary output to final output LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath); /* Current multithreading handles each partition separately. * Could use a higher level of granularity and have each file copying * performed as a separate Runnable. */// w w w . ja v a2 s .co m final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); Set<Integer> copiedPartitions = new HashSet<Integer>(); final List<MoveContentsAndDeleteTask> tasks = new ArrayList<MoveContentsAndDeleteTask>(); // Copy complete partitions copyPartitionsFrom(tmpOutputPath, fs, copiedPartitions, tasks, executor, outputPath); // Copy missing partitions from the empty partitions directory Path emptyPartitionsPath = new Path(tmpOutputPath, DomainBuilderAbstractOutputFormat.EMPTY_PARTITIONS_DIR); if (fs.exists(emptyPartitionsPath)) { copyPartitionsFrom(emptyPartitionsPath, fs, copiedPartitions, tasks, executor, outputPath); } executor.shutdown(); try { boolean allCopiersFinished = false; while (!allCopiersFinished) { allCopiersFinished = executor.awaitTermination(WAIT_CYCLE_SECONDS, TimeUnit.SECONDS); } } catch (InterruptedException e) { throw new IOException("Executor interrupted", e); } for (MoveContentsAndDeleteTask task : tasks) { if (task.exception != null) { throw new IOException("Partition copying failed for " + task.srcDir, task.exception); } } // Finally, cleanup cleanupJob(domainName, conf); }
From source file:com.moz.fiji.mapreduce.util.SerializeLoggerAspect.java
License:Apache License
/** * Logic to serialize collected profiling content to a file on HDFS. The files are stored * in the current working directory for this context, in a folder specified by STATS_DIR. The per * task file is named by the task attempt id. * We obtain the profiling stats collected by the LogTimerAspect in FijiSchema. The format of the * file is as follows: Job Name, Job ID, Task Attempt, Function Signature, * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n' * * @param context The {@link TaskInputOutputContext} for this job. * @throws IOException If the writes to HDFS fail. *///ww w.ja v a 2 s . c om private void serializeToFile(TaskInputOutputContext context) throws IOException { Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR); FileSystem fs = parentPath.getFileSystem(context.getConfiguration()); fs.mkdirs(parentPath); Path path = new Path(parentPath, context.getTaskAttemptID().toString()); OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8"); try { out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), " + "Number of Invocations, Time per call (nanoseconds)\n"); ConcurrentHashMap<String, LoggingInfo> signatureTimeMap = mLogTimerAspect.getSignatureTimeMap(); for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) { writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue()); } signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap(); for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) { writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue()); } } finally { out.close(); } }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
/** * Opens a new part file.// w ww . jav a2s . c o m * * <p> * This closes the old bucket file and retrieves a new bucket path from the {@code Bucketer}. */ private void openNewPartFile(Path bucketPath, BucketState<T> bucketState) throws Exception { closeCurrentPartFile(bucketState); FileSystem fs = new Path(basePath).getFileSystem(hadoopConf); if (!fs.exists(bucketPath)) { try { if (fs.mkdirs(bucketPath)) { LOG.debug("Created new bucket directory: {}", bucketPath); } } catch (IOException e) { throw new RuntimeException("Could not create new bucket path.", e); } } Path partPath = new Path(bucketPath, partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix); // This should work since there is only one parallel subtask that tries names with // our subtask id. Otherwise we would run into concurrency issues here. while (fs.exists(partPath) || fs .exists(new Path(partPath.getParent(), pendingPrefix + partPath.getName()).suffix(pendingSuffix))) { bucketState.partCounter++; partPath = new Path(bucketPath, partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix); } // increase, so we don't have to check for this name next time bucketState.partCounter++; LOG.debug("Next part path is {}", partPath.toString()); bucketState.currentFile = partPath.toString(); Path inProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); // If we don't already have a writer for this bucket, create one if (bucketState.writer == null) { bucketState.writer = writerTemplate.duplicate(); } bucketState.writer.open(fs, inProgressPath); bucketState.isWriterOpen = true; }
From source file:com.mvdb.platform.action.VersionMerge.java
License:Apache License
/** * @param hdfsFileSystem/*from w w w. j a v a 2 s . c o m*/ * @param topPath * @return * @throws IOException */ private static Path[] getInputPaths(FileSystem hdfsFileSystem, Path topPath, String lastMergedDirName, String lastcopiedDirName, Path passiveDbPathT) throws IOException { Path passiveDbPath = new Path(topPath, "db/mv1"); if (hdfsFileSystem.exists(passiveDbPath) == false) { hdfsFileSystem.mkdirs(passiveDbPath); } List<Path> pathList = new ArrayList<Path>(); buildInputPathList(hdfsFileSystem, topPath, pathList, lastMergedDirName, lastcopiedDirName); pathList.add(passiveDbPath); Path[] inputPaths = pathList.toArray(new Path[0]); return inputPaths; }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;// ww w . j av a 2s . co m FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }
From source file:com.mycompany.mavenpails2.PailMove.java
public static void setApplicationConf() throws IOException { Map conf = new HashMap(); String sers = "backtype.hadoop.ThriftSerialization,org.apache.hadoop.io.serializer.WritableSerialization"; conf.put("io.serializations", sers); Api.setApplicationConf(conf);//w ww . j av a 2 s. c o m FileSystem fs = FileSystem.get(new Configuration()); fs.delete(new Path(TEMP_DIR), true); fs.mkdirs(new Path(TEMP_DIR)); /* Configuration conf2 = new Configuration(); FileSystem fs = FileSystem.get(conf2); fs.delete(new Path(TEMP_DIR), true); fs.mkdirs(new Path(TEMP_DIR)); */ }
From source file:com.nearinfinity.blur.utils.BlurUtil.java
License:Apache License
public static boolean createPath(FileSystem fileSystem, Path path) throws IOException { if (!fileSystem.exists(path)) { LOG.info("Path [{0}] does not exist, creating.", path); fileSystem.mkdirs(path); return false; }/*from w w w .j av a2 s . c o m*/ return true; }