Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Setup output format appropriately/*  www. j  av  a  2s.c o m*/
 *
 * @param job - Job handle
 * @throws IOException - Exception if any
 */
private void configureOutputFormat(Job job) throws IOException {
    final Configuration configuration = job.getConfiguration();
    Path targetPath = inputOptions.getTargetPath();
    targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration));

    if (inputOptions.shouldAtomicCommit()) {
        Path workDir = inputOptions.getAtomicWorkPath();
        if (workDir == null) {
            workDir = targetPath.getParent();
        }
        workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt());
        FileSystem workFS = workDir.getFileSystem(configuration);
        FileSystem targetFS = targetPath.getFileSystem(configuration);
        if (!DistCpUtils.compareFs(targetFS, workFS)) {
            throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath
                    + " are in different file system");
        }
        CopyOutputFormat.setWorkingDirectory(job, workDir);
    } else {
        CopyOutputFormat.setWorkingDirectory(job, targetPath);
    }
    CopyOutputFormat.setCommitDirectory(job, targetPath);

    Path counterFilePath = inputOptions.getOutPutDirectory();
    if (counterFilePath == null) {
        LOG.error("Output directory is null for distcp");
    } else {
        LOG.info("DistCp output directory path: " + counterFilePath);
        CopyOutputFormat.setOutputPath(job, counterFilePath);
    }

}

From source file:com.inmobi.conduit.distcp.tools.FileBasedCopyListing.java

License:Apache License

protected static List<Path> fetchFileList(Path sourceListing, Configuration conf) throws IOException {
    List<Path> result = new ArrayList<Path>();
    FileSystem fs = sourceListing.getFileSystem(conf);
    BufferedReader input = null;//from   ww  w.j  a  v  a  2  s.  c  o  m
    try {
        input = new BufferedReader(new InputStreamReader(fs.open(sourceListing)));
        String line = input.readLine();
        while (line != null) {
            result.add(new Path(line));
            line = input.readLine();
        }
    } finally {
        IOUtils.closeStream(input);
    }
    return result;
}

From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java

License:Apache License

/**
 * Implementation of CopyListing::buildListing().
 * Creates the copy listing by "globbing" all source-paths.
 * @param pathToListingFile: The location at which the copy-listing file
 *                           is to be created.
 * @param options: Input Options for DistCp (indicating source/target paths.)
 * @throws IOException//from   w ww.  j av a 2 s. c o  m
 */
@Override
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    List<Path> globbedPaths = new ArrayList<Path>();
    if (options.getSourcePaths().isEmpty()) {
        throw new InvalidInputException("Nothing to process. Source paths::EMPTY");
    }

    for (Path p : options.getSourcePaths()) {
        FileSystem fs = p.getFileSystem(getConf());
        FileStatus[] inputs = fs.globStatus(p);

        if (inputs != null && inputs.length > 0) {
            for (FileStatus onePath : inputs) {
                globbedPaths.add(onePath.getPath());
            }
        } else {
            throw new InvalidInputException(p + " doesn't exist");
        }
    }

    DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath());
    optionsGlobbed.setSyncFolder(options.shouldSyncFolder());
    optionsGlobbed.setOverwrite(options.shouldOverwrite());
    optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing());
    optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath());
    optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation());
    optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing());

    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void cleanupTempFiles(JobContext context) {
    try {/*from   www .  j  a va2 s  . c  o m*/
        Configuration conf = HadoopCompat.getConfiguration(context);

        Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
        FileSystem targetFS = targetWorkPath.getFileSystem(conf);

        String jobId = HadoopCompat.getJobId(context).toString();
        deleteAttemptTempFiles(targetWorkPath, targetFS, jobId);
        deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId);
    } catch (Throwable t) {
        LOG.warn("Unable to cleanup temp files", t);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

/**
 * Cleanup meta folder and other temporary files
 *
 * @param conf - Job Configuration/*from   w w  w . j  ava  2  s .  c  om*/
 */
private void cleanup(Configuration conf) {
    Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER));
    try {
        FileSystem fs = metaFolder.getFileSystem(conf);
        LOG.info("Cleaning up temporary work folder: " + metaFolder);
        fs.delete(metaFolder, true);
    } catch (IOException ignore) {
        LOG.error("Exception encountered ", ignore);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void preserveFileAttributes(Configuration conf) throws IOException {
    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
    LOG.info("About to preserve attributes: " + attrSymbols);

    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf);
    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

    long preservedEntries = 0;
    try {/*from w  w  w .j a  v a 2 s . c  om*/
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();

        while (sourceReader.next(srcRelPath, srcFileStatus)) {
            if (!srcFileStatus.isDir())
                continue;

            Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);

            //Skip the root folder, preserve the status after atomic commit is complete
            //If it is changed any earlier, then atomic commit may fail
            if (targetRoot.equals(targetFile))
                continue;

            FileSystem targetFS = targetFile.getFileSystem(conf);
            DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes);

            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. ["
                    + sourceReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
    }
    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);/*from   ww  w.j  av  a2  s  .com*/
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void commitData(Configuration conf) throws IOException {

    Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
    Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    FileSystem targetFS = workDir.getFileSystem(conf);

    LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir);
    if (targetFS.exists(finalDir) && targetFS.exists(workDir))
        if (!targetFS.delete(finalDir, true)) {
            LOG.error("Unable to delete pre-existing final-data at " + finalDir);
            throw new IOException("Atomic commit failed. Pre-existing final data" + " in " + finalDir
                    + " could not be cleared, before commit.");
        }/*www  .ja  va  2s .  c  o m*/

    boolean result = targetFS.rename(workDir, finalDir);
    if (!result) {
        LOG.warn("Rename failed. Perhaps data already moved. Verifying...");
        result = targetFS.exists(finalDir) && !targetFS.exists(workDir);
    }
    if (result) {
        LOG.info("Data committed successfully to " + finalDir);
        HadoopCompat.setStatus(taskAttemptContext, "Data committed successfully to " + finalDir);
    } else {
        LOG.error("Unable to commit data to " + finalDir);
        throw new IOException(
                "Atomic commit failed. Temporary data in " + workDir + ", Unable to move to " + finalDir);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

@Override
public void setup(Context context) throws IOException, InterruptedException {
    conf = context.getConfiguration();/*from w w  w. ja  v a 2s  .co  m*/

    syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
    ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
    skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
    overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
    preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()));

    targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targetFS = targetFinalPath.getFileSystem(conf);

    if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
        overWrite = true; // When target is an existing file, overwrite it.
    }

    if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
        initializeSSLConf();
    }
    startEpoch = System.currentTimeMillis();
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

/**
 * Implementation of the Mapper<>::map(). Does the copy.
 * @param relPath: The target path.//from  ww w .j a  v a 2  s. c  om
 * @param sourceFileStatus: The source path.
 * @throws IOException
 */
@Override
public void map(Text relPath, FileStatus sourceFileStatus, Context context)
        throws IOException, InterruptedException {
    Path sourcePath = sourceFileStatus.getPath();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }
    if (LOG.isDebugEnabled())
        LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

    Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString());

    EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context);

    final String description = "Copying " + sourcePath + " to " + target;
    context.setStatus(description);

    LOG.info(description);

    try {
        FileStatus sourceCurrStatus;
        FileSystem sourceFS;
        try {
            sourceFS = sourcePath.getFileSystem(conf);
            sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
        } catch (FileNotFoundException e) {
            throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
        }

        FileStatus targetStatus = null;

        try {
            targetStatus = targetFS.getFileStatus(target);
        } catch (FileNotFoundException ignore) {
        }

        if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) {
            throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus)
                    + ", Source is " + getFileType(sourceCurrStatus));
        }

        if (sourceCurrStatus.isDir()) {
            createTargetDirsWithRetry(description, target, context);
            return;
        }

        if (skipFile(sourceFS, sourceCurrStatus, target)) {
            LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target);
            updateSkipCounters(context, sourceCurrStatus);
        } else {
            String streamName = null;
            if (!relPath.toString().isEmpty()) {
                Path relativePath = new Path(relPath.toString());
                if (relativePath.depth() > 2) {
                    // path is for mirror service and is of format
                    // /conduit/streams/<streamName>/2013/09/12
                    Path tmpPath = relativePath;
                    while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) {
                        tmpPath = tmpPath.getParent();
                    }
                    streamName = tmpPath.getName();
                } else {
                    // path is for merge service and of form /<stream name>/filename.gz
                    streamName = relativePath.getParent().getName();
                }
            }
            copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received);
            // generate audit counters
            if (received != null) {
                for (Entry<Long, Long> entry : received.entrySet()) {
                    String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(),
                            entry.getKey(), entry.getValue());
                    context.write(NullWritable.get(), new Text(counterNameValue));
                }
            }
        }

        DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes);

    } catch (IOException exception) {
        handleFailures(exception, sourceFileStatus, target, context);
    }
}