List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/*w ww . j a v a2s . c o m*/ */ public static void commitTask(Configuration conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); LOG.info("committing task: '{}' - {}", taskId, taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId, taskOutputPath); LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath); } } }
From source file:cn.spark.Case.MyMultipleOutputFormat.java
License:Apache License
/** * Generate the outfile name based on a given anme and the input file name. * If the map input file does not exists (i.e. this is not for a map only * job), the given name is returned unchanged. If the config value for * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given * name is returned unchanged. Otherwise, return a file name consisting of * the N trailing legs of the input file name where N is the config value * for "num.of.trailing.legs.to.use"./* www . j a v a 2 s .com*/ * * @param job * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ protected String getInputFileBasedOutputFileName(JobConf job, String name) { String infilepath = job.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } Path infile = new Path(infilepath); Path parent = infile.getParent(); String midName = infile.getName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) break; midName = parent.getName(); if (midName.length() == 0) break; parent = parent.getParent(); outPath = new Path(midName, outPath); } return outPath.toString(); }
From source file:com.alexholmes.hadooputils.sort.Sort.java
License:Apache License
/** * The driver for the sort MapReduce job. * * @param jobConf sort configuration * @param numMapTasks number of map tasks * @param numReduceTasks number of reduce tasks * @param sampler sampler, if required * @param codecClass the compression codec for compressing final outputs * @param mapCodecClass the compression codec for compressing intermediary map outputs * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes * for the job output files * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws IOException if something went wrong * @throws URISyntaxException if a URI wasn't correctly formed */// w w w. j av a2s. com public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks, final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass, final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException { jobConf.setJarByClass(Sort.class); jobConf.setJobName("sorter"); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (numMapTasks != null) { jobConf.setNumMapTasks(numMapTasks); } if (numReduceTasks != null) { jobConf.setNumReduceTasks(numReduceTasks); } else { int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sort.reduces_per_host"); if (sortReduces != null) { numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(numReduces); } jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(SortReduce.class); jobConf.setInputFormat(SortInputFormat.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); if (mapCodecClass != null) { jobConf.setMapOutputCompressorClass(mapCodecClass); } if (codecClass != null) { jobConf.setBoolean("mapred.output.compress", true); jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); } FileInputFormat.setInputPaths(jobConf, inputDirAsString); FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString)); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; FileSystem fileSystem = FileSystem.get(jobConf); if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) { inputDir = inputDir.getParent(); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + jobConf.getNumReduceTasks() + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); if (jobResult.isSuccessful()) { if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) { new LzoIndexer(jobConf).index(new Path(outputDirAsString)); } return true; } return false; }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;/*from w w w.j a v a 2 s. co m*/ FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.aliyun.fs.oss.blk.OssFileSystem.java
License:Apache License
/** * @param permission Currently ignored.//from w w w .j a v a2s .c o m */ @Override public boolean mkdirs(Path path, FsPermission permission) throws IOException { Path absolutePath = makeAbsolute(path); List<Path> paths = new ArrayList<Path>(); do { paths.add(0, absolutePath); absolutePath = absolutePath.getParent(); } while (absolutePath != null); boolean result = true; for (Path p : paths) { if (checkValidity(p)) { result &= mkdir(p); } } return result; }
From source file:com.aliyun.fs.oss.blk.OssFileSystem.java
License:Apache License
/** * @param permission Currently ignored.//from w w w . j ava 2s. c o m */ @Override public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { this.blocksForOneTime.clear(); INode inode = store.retrieveINode(makeAbsolute(file)); if (inode != null) { if (overwrite) { delete(file); } else { throw new IOException("File already exists: " + file); } } else { Path parent = file.getParent(); if (parent != null) { if (!mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent.toString()); } } } return new FSDataOutputStream(new OssOutputStream(getConf(), store, makeAbsolute(file), blockSize, progress, bufferSize, blocksForOneTime), statistics); }
From source file:com.aliyun.fs.oss.blk.OssFileSystem.java
License:Apache License
@Override public boolean rename(Path src, Path dst) throws IOException { Path absoluteSrc = makeAbsolute(src); INode srcINode = store.retrieveINode(absoluteSrc); if (srcINode == null) { // src path doesn't exist return false; }/*from w w w. j a va2 s . c o m*/ Path absoluteDst = makeAbsolute(dst); INode dstINode = store.retrieveINode(absoluteDst); if (dstINode != null && dstINode.isDirectory()) { absoluteDst = new Path(absoluteDst, absoluteSrc.getName()); dstINode = store.retrieveINode(absoluteDst); } if (dstINode != null) { // dst path already exists - can't overwrite return false; } Path dstParent = absoluteDst.getParent(); if (dstParent != null) { INode dstParentINode = store.retrieveINode(dstParent); if (dstParentINode == null || dstParentINode.isFile()) { // dst parent doesn't exist or is a file return false; } } return renameRecursive(absoluteSrc, absoluteDst); }
From source file:com.aliyun.fs.oss.common.InMemoryFileSystemStore.java
License:Apache License
public Set<Path> listSubPaths(Path path) throws IOException { Path normalizedPath = normalize(path); // This is inefficient but more than adequate for testing purposes. Set<Path> subPaths = new LinkedHashSet<Path>(); for (Path p : inodes.tailMap(normalizedPath).keySet()) { if (normalizedPath.equals(p.getParent())) { subPaths.add(p);/* ww w .ja va 2 s. c o m*/ } } return subPaths; }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
@Override public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { Path parent = path.getParent(); if (parent != null) { // expect this to raise an exception if there is no parent if (!getFileStatus(parent).isDirectory()) { throw new FileAlreadyExistsException("Not a directory: " + parent); }// w ww.j a va 2s . c o m } return create(path, permission, flags.contains(CreateFlag.OVERWRITE), bufferSize, replication, blockSize, progress); }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
@Override public boolean mkdirs(Path f, FsPermission permission) throws IOException { Path absolutePath = makeAbsolute(f); List<Path> paths = new ArrayList<Path>(); do {//from w w w . j a v a2 s .com paths.add(0, absolutePath); absolutePath = absolutePath.getParent(); } while (absolutePath != null); boolean result = true; for (Path path : paths) { result &= mkdir(path); } return result; }