Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java

License:Apache License

/**
 * Implementation of CopyListing::buildListing().
 * Creates the copy listing by "globbing" all source-paths.
 * @param pathToListingFile: The location at which the copy-listing file
 *                           is to be created.
 * @param options: Input Options for DistCp (indicating source/target paths.)
 * @throws IOException/*  ww  w.  ja  v a2 s. c o  m*/
 */
@Override
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    List<Path> globbedPaths = new ArrayList<Path>();
    if (options.getSourcePaths().isEmpty()) {
        throw new InvalidInputException("Nothing to process. Source paths::EMPTY");
    }

    for (Path p : options.getSourcePaths()) {
        FileSystem fs = p.getFileSystem(getConf());
        FileStatus[] inputs = fs.globStatus(p);

        if (inputs != null && inputs.length > 0) {
            for (FileStatus onePath : inputs) {
                globbedPaths.add(onePath.getPath());
            }
        } else {
            throw new InvalidInputException(p + " doesn't exist");
        }
    }

    DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath());
    optionsGlobbed.setSyncFolder(options.shouldSyncFolder());
    optionsGlobbed.setOverwrite(options.shouldOverwrite());
    optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing());
    optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath());
    optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation());
    optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing());

    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteAttemptTempFiles(Path targetWorkPath, FileSystem targetFS, String jobId) throws IOException {

    FileStatus[] tempFiles = targetFS/*from w  w  w . ja  va2s .  c  o  m*/
            .globStatus(new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job", "attempt") + "*"));

    if (tempFiles != null && tempFiles.length > 0) {
        for (FileStatus file : tempFiles) {
            LOG.info("Cleaning up " + file.getPath());
            targetFS.delete(file.getPath(), false);
        }
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);/*from w w  w .  ja  v  a 2  s  .c om*/
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

/**
 * Implementation of the Mapper<>::map(). Does the copy.
 * @param relPath: The target path./* w  w w .j a  v  a 2s.  c o m*/
 * @param sourceFileStatus: The source path.
 * @throws IOException
 */
@Override
public void map(Text relPath, FileStatus sourceFileStatus, Context context)
        throws IOException, InterruptedException {
    Path sourcePath = sourceFileStatus.getPath();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }
    if (LOG.isDebugEnabled())
        LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

    Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString());

    EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context);

    final String description = "Copying " + sourcePath + " to " + target;
    context.setStatus(description);

    LOG.info(description);

    try {
        FileStatus sourceCurrStatus;
        FileSystem sourceFS;
        try {
            sourceFS = sourcePath.getFileSystem(conf);
            sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
        } catch (FileNotFoundException e) {
            throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
        }

        FileStatus targetStatus = null;

        try {
            targetStatus = targetFS.getFileStatus(target);
        } catch (FileNotFoundException ignore) {
        }

        if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) {
            throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus)
                    + ", Source is " + getFileType(sourceCurrStatus));
        }

        if (sourceCurrStatus.isDir()) {
            createTargetDirsWithRetry(description, target, context);
            return;
        }

        if (skipFile(sourceFS, sourceCurrStatus, target)) {
            LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target);
            updateSkipCounters(context, sourceCurrStatus);
        } else {
            String streamName = null;
            if (!relPath.toString().isEmpty()) {
                Path relativePath = new Path(relPath.toString());
                if (relativePath.depth() > 2) {
                    // path is for mirror service and is of format
                    // /conduit/streams/<streamName>/2013/09/12
                    Path tmpPath = relativePath;
                    while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) {
                        tmpPath = tmpPath.getParent();
                    }
                    streamName = tmpPath.getName();
                } else {
                    // path is for merge service and of form /<stream name>/filename.gz
                    streamName = relativePath.getParent().getName();
                }
            }
            copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received);
            // generate audit counters
            if (received != null) {
                for (Entry<Long, Long> entry : received.entrySet()) {
                    String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(),
                            entry.getKey(), entry.getValue());
                    context.write(NullWritable.get(), new Text(counterNameValue));
                }
            }
        }

        DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes);

    } catch (IOException exception) {
        handleFailures(exception, sourceFileStatus, target, context);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

private void copyFileWithRetry(String description, FileStatus sourceFileStatus, Path target, Context context,
        EnumSet<DistCpOptions.FileAttribute> fileAttributes, Map<Long, Long> received) throws IOException {

    long bytesCopied;
    try {// w w  w .j a v  a  2  s  .c  o m
        bytesCopied = (Long) new RetriableFileCopyCommand(description).execute(sourceFileStatus, target,
                context, fileAttributes, received);
    } catch (Exception e) {
        context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
        throw new IOException("File copy failed: " + sourceFileStatus.getPath() + " --> " + target, e);
    }
    incrementCounter(context, Counter.BYTES_EXPECTED, sourceFileStatus.getLen());
    incrementCounter(context, Counter.BYTES_COPIED, bytesCopied);
    incrementCounter(context, Counter.PATHS_COPIED, 1);
    totalBytesCopied += bytesCopied;

}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

private void handleFailures(IOException exception, FileStatus sourceFileStatus, Path target, Context context)
        throws IOException, InterruptedException {
    LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " + target, exception);

    if (ignoreFailures && exception.getCause() instanceof RetriableFileCopyCommand.CopyReadException) {
        incrementCounter(context, Counter.PATHS_FAILED, 1);
        incrementCounter(context, Counter.BYTES_FAILED, sourceFileStatus.getLen());
        context.write(null, new Text(
                "FAIL: " + sourceFileStatus.getPath() + " - " + StringUtils.stringifyException(exception)));
    } else//from ww w .  j  a  v  a 2 s  .  co m
        throw exception;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target) throws IOException {
    final FileStatus targetFileStatus = targetFS.getFileStatus(target);

    return syncFolders && (targetFileStatus.getLen() != source.getLen()
            || (!skipCrc && !DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), targetFS, target))
            || (source.getBlockSize() != targetFileStatus.getBlockSize()
                    && preserve.contains(FileAttribute.BLOCKSIZE)));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputChunkSet.java

License:Apache License

/**
 * Factory method that//from  w  ww .java2  s . c om
 * 1. acquires a chunk for the specified map-task attempt
 * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
 * @param taskAttemptContext The attempt-context for the map task that's
 * trying to acquire a chunk.
 * @return The acquired dynamic-chunk. The chunk-file is renamed to the
 * attempt-id (from the attempt-context.)
 * @throws IOException Exception on failure.
 * @throws InterruptedException Exception on failure.
 */
public DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    String taskId = HadoopCompat.getTaskAttemptID(taskAttemptContext).getTaskID().toString();
    Path acquiredFilePath = new Path(chunkRootPath, taskId);

    if (fs.exists(acquiredFilePath)) {
        LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
        return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
    }

    for (FileStatus chunkFile : getListOfChunkFiles()) {
        if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
            LOG.info(taskId + " acquired " + chunkFile.getPath());
            return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
        } else
            LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
    }

    return null;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);/*  w w w .  ja  v  a2  s.  c  o m*/

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java

License:Apache License

private long doCopy(FileStatus sourceFileStatus, Path target, Mapper.Context context,
        EnumSet<FileAttribute> fileAttributes, Map<Long, Long> received) throws IOException {

    Path tmpTargetPath = getTmpFile(target, context);
    final Configuration configuration = HadoopCompat.getTaskConfiguration(context);
    FileSystem targetFS = target.getFileSystem(configuration);
    compressionCodecs = new CompressionCodecFactory(context.getConfiguration());
    try {//  w w  w  .  ja  v a  2  s  .c  om
        if (LOG.isDebugEnabled()) {
            LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
            LOG.debug("Tmp-file path: " + tmpTargetPath);
        }
        FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(configuration);
        long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus, context, fileAttributes,
                received);

        compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
        if (bytesRead > 0) {
            compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
        }
        promoteTmpToTarget(tmpTargetPath, target, targetFS);
        return bytesRead;

    } finally {
        if (targetFS.exists(tmpTargetPath))
            targetFS.delete(tmpTargetPath, false);
    }
}