List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputFormat.java
License:Apache License
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle(JobContext context) throws IOException { final Configuration configuration = HadoopCompat.getConfiguration(context); int numRecords = getNumberOfRecords(configuration); int numMaps = getNumMapTasks(configuration); // Number of chunks each map will process, on average. int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords); validateNumChunksUsing(splitRatio, numMaps); int numEntriesPerChunk = (int) Math.ceil((float) numRecords / (splitRatio * numMaps)); DistCpUtils.publish(HadoopCompat.getConfiguration(context), CONF_LABEL_NUM_ENTRIES_PER_CHUNK, numEntriesPerChunk);/*from w w w . j av a 2 s. c o m*/ final int nChunksTotal = (int) Math.ceil((float) numRecords / numEntriesPerChunk); int nChunksOpenAtOnce = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal); Path listingPath = getListingFilePath(configuration); SequenceFile.Reader reader = new SequenceFile.Reader(listingPath.getFileSystem(configuration), listingPath, configuration); List<DynamicInputChunk> openChunks = new ArrayList<DynamicInputChunk>(); List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>(); FileStatus fileStatus = new FileStatus(); Text relPath = new Text(); int recordCounter = 0; int chunkCount = 0; DynamicInputChunkSet chunkSet = new DynamicInputChunkSet(configuration); try { while (reader.next(relPath, fileStatus)) { if (recordCounter % (nChunksOpenAtOnce * numEntriesPerChunk) == 0) { // All chunks full. Create new chunk-set. closeAll(openChunks); chunksFinal.addAll(openChunks); openChunks = createChunks(chunkSet, chunkCount, nChunksTotal, nChunksOpenAtOnce); chunkCount += openChunks.size(); nChunksOpenAtOnce = openChunks.size(); recordCounter = 0; } // Shuffle into open chunks. openChunks.get(recordCounter % nChunksOpenAtOnce).write(relPath, fileStatus); ++recordCounter; } } finally { closeAll(openChunks); chunksFinal.addAll(openChunks); IOUtils.closeStream(reader); } LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); return chunksFinal; }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputFormat.java
License:Apache License
private static Path getListingFilePath(Configuration configuration) { String listingFilePathString = configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, ""); assert !listingFilePathString.equals("") : "Listing file not found."; Path listingFilePath = new Path(listingFilePathString); try {//from w w w . ja v a2 s. co m assert listingFilePath.getFileSystem(configuration).exists(listingFilePath) : "Listing file: " + listingFilePath + " not found."; } catch (IOException e) { assert false : "Listing file: " + listingFilePath + " couldn't be accessed. " + e.getMessage(); } return listingFilePath; }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableDirectoryCreateCommand.java
License:Apache License
/** * Implementation of RetriableCommand::doExecute(). * This implements the actual mkdirs() functionality. * @param arguments Argument-list to the command. * @return Boolean. True, if the directory could be created successfully. * @throws Exception IOException, on failure to create the directory. */// ww w. ja va2 s . c om @Override protected Object doExecute(Object... arguments) throws Exception { assert arguments.length == 2 : "Unexpected argument list."; Path target = (Path) arguments[0]; Mapper.Context context = (Mapper.Context) arguments[1]; FileSystem targetFS = target.getFileSystem(HadoopCompat.getTaskConfiguration(context)); return targetFS.mkdirs(target); }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private long doCopy(FileStatus sourceFileStatus, Path target, Mapper.Context context, EnumSet<FileAttribute> fileAttributes, Map<Long, Long> received) throws IOException { Path tmpTargetPath = getTmpFile(target, context); final Configuration configuration = HadoopCompat.getTaskConfiguration(context); FileSystem targetFS = target.getFileSystem(configuration); compressionCodecs = new CompressionCodecFactory(context.getConfiguration()); try {// w w w . j ava 2 s . c o m if (LOG.isDebugEnabled()) { LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target); LOG.debug("Tmp-file path: " + tmpTargetPath); } FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(configuration); long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus, context, fileAttributes, received); compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead); if (bytesRead > 0) { compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath); } promoteTmpToTarget(tmpTargetPath, target, targetFS); return bytesRead; } finally { if (targetFS.exists(tmpTargetPath)) targetFS.delete(tmpTargetPath, false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private void compareFileLengths(FileStatus sourceFileStatus, Path target, Configuration configuration, long bytesRead) throws IOException { final Path sourcePath = sourceFileStatus.getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); if (fs.getFileStatus(sourcePath).getLen() != bytesRead) throw new IOException("Mismatch in length of source:" + sourcePath + " and target:" + target); }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private static ThrottledInputStream getInputStream(Path path, Configuration conf) throws IOException { try {/*from w w w.j a va 2 s . co m*/ FileSystem fs = path.getFileSystem(conf); long bandwidthKB = getAllowedBandwidth(conf); return new ThrottledInputStream(new BufferedInputStream(fs.open(path)), bandwidthKB * 1024); } catch (IOException e) { throw new CopyReadException(e); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);//from w w w . j ava2s. c o m int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.inmobi.conduit.distcp.tools.mapred.UniformSizeInputFormat.java
License:Apache License
private SequenceFile.Reader getListingFileReader(Configuration configuration) { final Path listingFilePath = getListingFilePath(configuration); try {//from w w w. j a v a 2 s . c o m final FileSystem fileSystem = listingFilePath.getFileSystem(configuration); if (!fileSystem.exists(listingFilePath)) throw new IllegalArgumentException("Listing file doesn't exist at: " + listingFilePath); return new SequenceFile.Reader(fileSystem, listingFilePath, configuration); } catch (IOException exception) { LOG.error("Couldn't find listing file at: " + listingFilePath, exception); throw new IllegalArgumentException("Couldn't find listing-file at: " + listingFilePath, exception); } }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
@Override protected void validatePaths(DistCpOptions options) throws IOException, InvalidInputException { if (options.isSkipPathValidation()) { LOG.debug("Skipping Path Validation in disctp"); return;/*from w w w . jav a2 s . c om*/ } Path targetPath = options.getTargetPath(); FileSystem targetFS = targetPath.getFileSystem(getConf()); boolean targetIsFile = targetFS.isFile(targetPath); //If target is a file, then source has to be single file if (targetIsFile) { if (options.getSourcePaths().size() > 1) { throw new InvalidInputException("Multiple source being copied to a file: " + targetPath); } Path srcPath = options.getSourcePaths().get(0); FileSystem sourceFS = srcPath.getFileSystem(getConf()); if (!sourceFS.isFile(srcPath)) { throw new InvalidInputException( "Cannot copy " + srcPath + ", which is not a file to " + targetPath); } } for (Path path : options.getSourcePaths()) { FileSystem fs = path.getFileSystem(getConf()); if (!fs.exists(path)) { throw new InvalidInputException(path + " doesn't exist"); } } /* This is requires to allow map tasks to access each of the source clusters. This would retrieve the delegation token for each unique file system and add them to job's private credential store */ Credentials credentials = getCredentials(); if (credentials != null) { Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]); TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf()); } }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w .j av a2 s. com*/ public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { SequenceFile.Writer fileListWriter = null; try { fileListWriter = getWriter(pathToListingFile); for (Path path : options.getSourcePaths()) { FileSystem sourceFS = path.getFileSystem(getConf()); path = makeQualified(path); FileStatus rootStatus = sourceFS.getFileStatus(path); Path sourcePathRoot = computeSourceRootPath(rootStatus, options); boolean localFile = (rootStatus.getClass() != FileStatus.class); FileStatus[] sourceFiles = sourceFS.listStatus(path); if (sourceFiles != null && sourceFiles.length > 0) { for (FileStatus sourceStatus : sourceFiles) { if (LOG.isDebugEnabled()) { LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy."); } writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile, options); if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) { if (LOG.isDebugEnabled()) { LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath()); } traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile, options); } } } else { writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile, options); } } } finally { try { if (fileListWriter != null) fileListWriter.close(); } catch (IOException exception) { LOG.error("Could not close output-steam to the file-list: ", exception); throw exception; } } }