List of usage examples for org.apache.hadoop.fs FileStatus FileStatus
public FileStatus()
From source file:com.hdfs.concat.crush.BucketerTest.java
License:Apache License
@Test(expected = IllegalStateException.class) public void callAddBeforeReset() { bucketer.add(new FileStatusHasSize(new FileStatus())); }
From source file:com.inmobi.conduit.distcp.tools.CopyListing.java
License:Apache License
/** * Validate the final resulting path listing to see if there are any duplicate entries * * @param pathToListFile - path listing build by doBuildListing * @throws IOException - Any issues while checking for duplicates and throws * @throws DuplicateFileException - if there are duplicates *///from w w w.j ava2 s . co m protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException { Configuration config = getConf(); FileSystem fs = pathToListFile.getFileSystem(config); Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile); SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config); try { Text lastKey = new Text("*"); //source relative path can never hold * FileStatus lastFileStatus = new FileStatus(); Text currentKey = new Text(); while (reader.next(currentKey)) { if (currentKey.equals(lastKey)) { FileStatus currentFileStatus = new FileStatus(); reader.getCurrentValue(currentFileStatus); throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " + currentFileStatus.getPath() + " would cause duplicates. Aborting"); } reader.getCurrentValue(lastFileStatus); lastKey.set(currentKey); } } finally { IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void preserveFileAttributes(Configuration conf) throws IOException { String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); LOG.info("About to preserve attributes: " + attrSymbols); EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); long preservedEntries = 0; try {/* www . j a va2 s . com*/ FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); while (sourceReader.next(srcRelPath, srcFileStatus)) { if (!srcFileStatus.isDir()) continue; Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); //Skip the root folder, preserve the status after atomic commit is complete //If it is changed any earlier, then atomic commit may fail if (targetRoot.equals(targetFile)) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes); HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); } LOG.info("Preserved status on " + preservedEntries + " dir entries on target"); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);//from ww w. j a v a 2 s . com DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputFormat.java
License:Apache License
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle(JobContext context) throws IOException { final Configuration configuration = HadoopCompat.getConfiguration(context); int numRecords = getNumberOfRecords(configuration); int numMaps = getNumMapTasks(configuration); // Number of chunks each map will process, on average. int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords); validateNumChunksUsing(splitRatio, numMaps); int numEntriesPerChunk = (int) Math.ceil((float) numRecords / (splitRatio * numMaps)); DistCpUtils.publish(HadoopCompat.getConfiguration(context), CONF_LABEL_NUM_ENTRIES_PER_CHUNK, numEntriesPerChunk);/*from w w w.jav a 2 s. c om*/ final int nChunksTotal = (int) Math.ceil((float) numRecords / numEntriesPerChunk); int nChunksOpenAtOnce = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal); Path listingPath = getListingFilePath(configuration); SequenceFile.Reader reader = new SequenceFile.Reader(listingPath.getFileSystem(configuration), listingPath, configuration); List<DynamicInputChunk> openChunks = new ArrayList<DynamicInputChunk>(); List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>(); FileStatus fileStatus = new FileStatus(); Text relPath = new Text(); int recordCounter = 0; int chunkCount = 0; DynamicInputChunkSet chunkSet = new DynamicInputChunkSet(configuration); try { while (reader.next(relPath, fileStatus)) { if (recordCounter % (nChunksOpenAtOnce * numEntriesPerChunk) == 0) { // All chunks full. Create new chunk-set. closeAll(openChunks); chunksFinal.addAll(openChunks); openChunks = createChunks(chunkSet, chunkCount, nChunksTotal, nChunksOpenAtOnce); chunkCount += openChunks.size(); nChunksOpenAtOnce = openChunks.size(); recordCounter = 0; } // Shuffle into open chunks. openChunks.get(recordCounter % nChunksOpenAtOnce).write(relPath, fileStatus); ++recordCounter; } } finally { closeAll(openChunks); chunksFinal.addAll(openChunks); IOUtils.closeStream(reader); } LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); return chunksFinal; }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException { long lastEnd = 0; //Verify if each split's start is matching with the previous end and //we are not missing anything for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; long start = fileSplit.getStart(); Assert.assertEquals(lastEnd, start); lastEnd = start + fileSplit.getLength(); }/*from w w w. j ava2s. c o m*/ //Verify there is nothing more to read from the input file SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(), listFile, cluster.getFileSystem().getConf()); try { reader.seek(lastEnd); FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); Assert.assertFalse(reader.next(srcRelPath, srcFileStatus)); } finally { IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.UniformSizeInputFormat.java
License:Apache License
private List<InputSplit> getSplits(Configuration configuration, int numSplits, long totalSizeBytes) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits); FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); long currentSplitSize = 0; long lastSplitStart = 0; long lastPosition = 0; final Path listingFilePath = getListingFilePath(configuration); if (LOG.isDebugEnabled()) { LOG.debug("Average bytes per map: " + nBytesPerSplit + ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes); }//from ww w . j a v a 2s . c o m SequenceFile.Reader reader = null; try { reader = getListingFileReader(configuration); while (reader.next(srcRelPath, srcFileStatus)) { // If adding the current file would cause the bytes per map to exceed // limit. Add the current file to new split if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) { FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null); if (LOG.isDebugEnabled()) { LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize); } splits.add(split); lastSplitStart = lastPosition; currentSplitSize = 0; } currentSplitSize += srcFileStatus.getLen(); lastPosition = reader.getPosition(); } if (lastPosition > lastSplitStart) { FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null); if (LOG.isDebugEnabled()) { LOG.info("Creating split : " + split + ", bytes in split: " + currentSplitSize); } splits.add(split); } } finally { IOUtils.closeStream(reader); } return splits; }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
private FileStatus getFileStatus(FileStatus fileStatus) throws IOException { FileStatus status = new FileStatus(); buffer.reset();//from w ww. j a va 2 s. c o m DataOutputStream out = new DataOutputStream(buffer); fileStatus.write(out); in.reset(buffer.toByteArray(), 0, buffer.size()); status.readFields(in); return status; }
From source file:com.inmobi.conduit.distcp.tools.TestCopyListing.java
License:Apache License
@Test public void testBuildListingForSingleFile() { FileSystem fs = null;//from w w w . jav a 2 s . c om String testRootString = "/singleFileListing"; Path testRoot = new Path(testRootString); SequenceFile.Reader reader = null; try { fs = FileSystem.get(getConf()); if (fs.exists(testRoot)) TestDistCpUtils.delete(fs, testRootString); Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt"); Path decoyFile = new Path(testRoot, "/target/moo/source.txt"); Path targetFile = new Path(testRoot, "/target/moo/target.txt"); TestDistCpUtils.createFile(fs, sourceFile.toString()); TestDistCpUtils.createFile(fs, decoyFile.toString()); TestDistCpUtils.createFile(fs, targetFile.toString()); List<Path> srcPaths = new ArrayList<Path>(); srcPaths.add(sourceFile); DistCpOptions options = new DistCpOptions(srcPaths, targetFile); CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS); final Path listFile = new Path(testRoot, "/tmp/fileList.seq"); listing.buildListing(listFile, options); reader = new SequenceFile.Reader(fs, listFile, getConf()); FileStatus fileStatus = new FileStatus(); Text relativePath = new Text(); Assert.assertTrue(reader.next(relativePath, fileStatus)); Assert.assertTrue(relativePath.toString().equals("")); } catch (Exception e) { Assert.fail("Unexpected exception encountered."); LOG.error("Unexpected exception: ", e); } finally { TestDistCpUtils.delete(fs, testRootString); IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.TestFileBasedCopyListing.java
License:Apache License
private void checkResult(Path listFile, int count) throws IOException { if (count == 0) { return;/* w w w .j av a 2s . c o m*/ } int recCount = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, listFile, config); try { Text relPath = new Text(); FileStatus fileStatus = new FileStatus(); while (reader.next(relPath, fileStatus)) { Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString())); recCount++; } } finally { IOUtils.closeStream(reader); } Assert.assertEquals(recCount, count); }