List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java
License:Apache License
/** * Implementation of CopyListing::buildListing(). * Creates the copy listing by "globbing" all source-paths. * @param pathToListingFile: The location at which the copy-listing file * is to be created. * @param options: Input Options for DistCp (indicating source/target paths.) * @throws IOException/* ww w. ja v a2 s. c o m*/ */ @Override public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { List<Path> globbedPaths = new ArrayList<Path>(); if (options.getSourcePaths().isEmpty()) { throw new InvalidInputException("Nothing to process. Source paths::EMPTY"); } for (Path p : options.getSourcePaths()) { FileSystem fs = p.getFileSystem(getConf()); FileStatus[] inputs = fs.globStatus(p); if (inputs != null && inputs.length > 0) { for (FileStatus onePath : inputs) { globbedPaths.add(onePath.getPath()); } } else { throw new InvalidInputException(p + " doesn't exist"); } } DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath()); optionsGlobbed.setSyncFolder(options.shouldSyncFolder()); optionsGlobbed.setOverwrite(options.shouldOverwrite()); optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing()); optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath()); optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation()); optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing()); simpleListing.buildListing(pathToListingFile, optionsGlobbed); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteAttemptTempFiles(Path targetWorkPath, FileSystem targetFS, String jobId) throws IOException { FileStatus[] tempFiles = targetFS/*from w w w . ja va2s . c o m*/ .globStatus(new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job", "attempt") + "*")); if (tempFiles != null && tempFiles.length > 0) { for (FileStatus file : tempFiles) { LOG.info("Cleaning up " + file.getPath()); targetFS.delete(file.getPath(), false); } } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/*from w w w . ja v a 2 s .c om*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
/** * Implementation of the Mapper<>::map(). Does the copy. * @param relPath: The target path./* w w w .j a v a 2s. c o m*/ * @param sourceFileStatus: The source path. * @throws IOException */ @Override public void map(Text relPath, FileStatus sourceFileStatus, Context context) throws IOException, InterruptedException { Path sourcePath = sourceFileStatus.getPath(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } if (LOG.isDebugEnabled()) LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath); Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString()); EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context); final String description = "Copying " + sourcePath + " to " + target; context.setStatus(description); LOG.info(description); try { FileStatus sourceCurrStatus; FileSystem sourceFS; try { sourceFS = sourcePath.getFileSystem(conf); sourceCurrStatus = sourceFS.getFileStatus(sourcePath); } catch (FileNotFoundException e) { throw new IOException(new RetriableFileCopyCommand.CopyReadException(e)); } FileStatus targetStatus = null; try { targetStatus = targetFS.getFileStatus(target); } catch (FileNotFoundException ignore) { } if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) { throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus)); } if (sourceCurrStatus.isDir()) { createTargetDirsWithRetry(description, target, context); return; } if (skipFile(sourceFS, sourceCurrStatus, target)) { LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target); updateSkipCounters(context, sourceCurrStatus); } else { String streamName = null; if (!relPath.toString().isEmpty()) { Path relativePath = new Path(relPath.toString()); if (relativePath.depth() > 2) { // path is for mirror service and is of format // /conduit/streams/<streamName>/2013/09/12 Path tmpPath = relativePath; while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) { tmpPath = tmpPath.getParent(); } streamName = tmpPath.getName(); } else { // path is for merge service and of form /<stream name>/filename.gz streamName = relativePath.getParent().getName(); } } copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received); // generate audit counters if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(), entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } } DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes); } catch (IOException exception) { handleFailures(exception, sourceFileStatus, target, context); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
private void copyFileWithRetry(String description, FileStatus sourceFileStatus, Path target, Context context, EnumSet<DistCpOptions.FileAttribute> fileAttributes, Map<Long, Long> received) throws IOException { long bytesCopied; try {// w w w .j a v a 2 s .c o m bytesCopied = (Long) new RetriableFileCopyCommand(description).execute(sourceFileStatus, target, context, fileAttributes, received); } catch (Exception e) { context.setStatus("Copy Failure: " + sourceFileStatus.getPath()); throw new IOException("File copy failed: " + sourceFileStatus.getPath() + " --> " + target, e); } incrementCounter(context, Counter.BYTES_EXPECTED, sourceFileStatus.getLen()); incrementCounter(context, Counter.BYTES_COPIED, bytesCopied); incrementCounter(context, Counter.PATHS_COPIED, 1); totalBytesCopied += bytesCopied; }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
private void handleFailures(IOException exception, FileStatus sourceFileStatus, Path target, Context context) throws IOException, InterruptedException { LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " + target, exception); if (ignoreFailures && exception.getCause() instanceof RetriableFileCopyCommand.CopyReadException) { incrementCounter(context, Counter.PATHS_FAILED, 1); incrementCounter(context, Counter.BYTES_FAILED, sourceFileStatus.getLen()); context.write(null, new Text( "FAIL: " + sourceFileStatus.getPath() + " - " + StringUtils.stringifyException(exception))); } else//from ww w . j a v a 2 s . co m throw exception; }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target) throws IOException { final FileStatus targetFileStatus = targetFS.getFileStatus(target); return syncFolders && (targetFileStatus.getLen() != source.getLen() || (!skipCrc && !DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), targetFS, target)) || (source.getBlockSize() != targetFileStatus.getBlockSize() && preserve.contains(FileAttribute.BLOCKSIZE))); }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputChunkSet.java
License:Apache License
/** * Factory method that//from w ww .java2 s . c om * 1. acquires a chunk for the specified map-task attempt * 2. returns a DynamicInputChunk associated with the acquired chunk-file. * @param taskAttemptContext The attempt-context for the map task that's * trying to acquire a chunk. * @return The acquired dynamic-chunk. The chunk-file is renamed to the * attempt-id (from the attempt-context.) * @throws IOException Exception on failure. * @throws InterruptedException Exception on failure. */ public DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { String taskId = HadoopCompat.getTaskAttemptID(taskAttemptContext).getTaskID().toString(); Path acquiredFilePath = new Path(chunkRootPath, taskId); if (fs.exists(acquiredFilePath)) { LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath); return new DynamicInputChunk(acquiredFilePath, taskAttemptContext); } for (FileStatus chunkFile : getListOfChunkFiles()) { if (fs.rename(chunkFile.getPath(), acquiredFilePath)) { LOG.info(taskId + " acquired " + chunkFile.getPath()); return new DynamicInputChunk(acquiredFilePath, taskAttemptContext); } else LOG.warn(taskId + " could not acquire " + chunkFile.getPath()); } return null; }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);/* w w w . ja v a2 s. c o m*/ JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private long doCopy(FileStatus sourceFileStatus, Path target, Mapper.Context context, EnumSet<FileAttribute> fileAttributes, Map<Long, Long> received) throws IOException { Path tmpTargetPath = getTmpFile(target, context); final Configuration configuration = HadoopCompat.getTaskConfiguration(context); FileSystem targetFS = target.getFileSystem(configuration); compressionCodecs = new CompressionCodecFactory(context.getConfiguration()); try {// w w w . ja v a 2 s .c om if (LOG.isDebugEnabled()) { LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target); LOG.debug("Tmp-file path: " + tmpTargetPath); } FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(configuration); long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus, context, fileAttributes, received); compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead); if (bytesRead > 0) { compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath); } promoteTmpToTarget(tmpTargetPath, target, targetFS); return bytesRead; } finally { if (targetFS.exists(tmpTargetPath)) targetFS.delete(tmpTargetPath, false); } }