Example usage for org.apache.hadoop.tools DistCp DistCp

List of usage examples for org.apache.hadoop.tools DistCp DistCp

Introduction

In this page you can find the example usage for org.apache.hadoop.tools DistCp DistCp.

Prototype

public DistCp(Configuration configuration, DistCpOptions inputOptions) throws Exception 

Source Link

Document

Public Constructor.

Usage

From source file:com.inmobi.databus.distcp.DistcpBaseService.java

License:Apache License

protected Boolean executeDistCp(DistCpOptions options) throws Exception {
    //Add Additional Default arguments to the array below which gets merged
    //with the arguments as sent in by the Derived Service
    Configuration conf = destCluster.getHadoopConf();
    DistCp distCp = new DistCp(conf, options);
    try {/*from w w w  .  j ava2 s .c  o m*/
        distCp.execute();
    } catch (Exception e) {
        LOG.error("Exception encountered ", e);
        throw e;
    }
    return true;
}

From source file:com.pinterest.terrapin.hadoop.BaseUploader.java

License:Apache License

@VisibleForTesting
protected DistCp getDistCp(Configuration conf, DistCpOptions options) throws Exception {
    return new DistCp(conf, options);
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.DistCopyHDFS.java

License:Apache License

/**
 * method to construct a new DistCp object to perform the distcp
 *
 * @param pathsList   A list of paths to be recursively copied from one cluster to another
 * @param destination The root location on the target cluster
 * @return a DistCp object//from w ww  .  j  av a2 s . c  om
 * @throws Exception if the construction of the {@link DistCp} object fails for any reason
 */
protected DistCp getDistCp(List<Path> pathsList, Path destination) throws Exception {
    final Configuration conf = getConfiguration();
    DistCpOptions opts = new DistCpOptions(pathsList, destination);
    return new DistCp(conf, opts);
}

From source file:de.tiqsolutions.hdfs.HadoopFileSystemProvider.java

License:Apache License

private void remoteCopy(Path source, Path target, CopyOption... options) throws IOException {
    Configuration configuration = getConfiguration();
    Path tmp = target.getParent();
    Path dest = null;/*w  w  w. j  a va 2s.  c  o m*/
    do {
        dest = tmp.resolve(String.format("tmp%s/", System.currentTimeMillis()));
    } while (Files.exists(dest));
    try {
        DistCpOptions distCpOptions = new DistCpOptions(
                Arrays.asList(((HadoopFileSystemPath) source).getPath()),
                ((HadoopFileSystemPath) dest).getPath());
        List<CopyOption> optionList = Arrays.asList(options);

        distCpOptions.setOverwrite(optionList.contains(StandardCopyOption.REPLACE_EXISTING));
        try {
            DistCp distCp = new DistCp(configuration, distCpOptions);
            Job job = distCp.execute();
            job.waitForCompletion(true);
        } catch (Exception e) {
            throw new IOException(e.getLocalizedMessage(), e);
        }
        move(dest.resolve(source.getFileName()), target, options);
    } finally {
        delete(dest, false);
    }

}

From source file:org.apache.falcon.hive.util.EventUtils.java

License:Apache License

public void invokeCopy() throws Exception {
    DistCpOptions options = getDistCpOptions();
    DistCp distCp = new DistCp(conf, options);
    LOG.info("Started DistCp with source Path: {} \ttarget path: {}", sourceStagingUri, targetStagingUri);

    Job distcpJob = distCp.execute();//from  w ww .  jav  a  2  s .  c om
    LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString());
    LOG.info("Completed DistCp");
    if (distcpJob.getStatus().getState() == JobStatus.State.SUCCEEDED) {
        countersMap = HiveDRUtils.fetchReplicationCounters(conf, distcpJob);
    }
}

From source file:org.apache.falcon.replication.FeedReplicator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    CommandLine cmd = getCommand(args);/*from  w  ww . ja  v  a2  s.  c om*/

    Configuration conf = this.getConf();
    // inject wf configs
    Path confPath = new Path("file:///" + System.getProperty("oozie.action.conf.xml"));

    LOG.info("{} found conf ? {}", confPath, confPath.getFileSystem(conf).exists(confPath));
    conf.addResource(confPath);

    String includePathConf = conf.get("falcon.include.path");
    final boolean includePathSet = (includePathConf != null) && !IGNORE.equalsIgnoreCase(includePathConf);

    DistCpOptions options = getDistCpOptions(cmd, includePathSet);

    String availabilityFlagOpt = cmd.getOptionValue("availabilityFlag");
    if (StringUtils.isEmpty(availabilityFlagOpt)) {
        availabilityFlagOpt = "NA";
    }
    String availabilityFlag = EntityUtil.SUCCEEDED_FILE_NAME;
    if (cmd.getOptionValue("falconFeedStorageType").equals(Storage.TYPE.FILESYSTEM.name())) {
        availabilityFlag = "NA".equals(availabilityFlagOpt) ? availabilityFlag : availabilityFlagOpt;
    }

    conf.set("falcon.feed.availability.flag", availabilityFlag);
    DistCp distCp = (includePathSet) ? new CustomReplicator(conf, options) : new DistCp(conf, options);
    LOG.info("Started DistCp with options :" + options);
    Job job = distCp.execute();

    if (cmd.hasOption("counterLogDir") && job.getStatus().getState() == JobStatus.State.SUCCEEDED) {
        LOG.info("Gathering counters for the the Feed Replication job");
        Path counterFile = new Path(cmd.getOptionValue("counterLogDir"), "counter.txt");
        JobCounters fsReplicationCounters = JobCountersHandler.getCountersType(JobType.FSREPLICATION.name());
        if (fsReplicationCounters != null) {
            fsReplicationCounters.obtainJobCounters(conf, job, true);
            fsReplicationCounters.storeJobCounters(conf, counterFile);
        }
    }

    if (includePathSet) {
        executePostProcessing(conf, options); // this only applies for FileSystem Storage.
    }

    LOG.info("Completed DistCp");
    return 0;
}

From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java

License:Apache License

protected void invokeCopy(String sourceStorageUrl, String targetStorageUrl, DistributedFileSystem sourceFs,
        DistributedFileSystem targetFs, String sourceDir, String targetDir, String currentSnapshotName)
        throws FalconException {
    try {//from  w  w  w .ja  v a  2  s . c  om
        Configuration jobConf = this.getConf();
        DistCpOptions options = getDistCpOptions(sourceStorageUrl, targetStorageUrl, sourceFs, targetFs,
                sourceDir, targetDir, currentSnapshotName);
        DistCp distCp = new DistCp(jobConf, options);
        LOG.info("Started Snapshot based DistCp from {} to {} ", getStagingUri(sourceStorageUrl, sourceDir),
                getStagingUri(targetStorageUrl, targetDir));
        Job distcpJob = distCp.execute();
        LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString());
        LOG.info("Completed Snapshot based DistCp");

    } catch (FalconException fe) {
        throw fe;
    } catch (Exception e) {
        throw new FalconException("Unable to replicate HDFS directory using snapshots.", e);
    }
}