Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:audr.text.utils.FileUtils.java

License:Open Source License

/**
 * hadoop/*www  .j  a  va 2 s .c o  m*/
 * 
 * @param localFile
 *            
 * @param hadoopFile
 *            hadoop
 * @return
 */
public static void uploadFile2HDFS(String localFile, String hadoopFile) {
    try {
        Configuration conf = new Configuration();
        FileSystem src = FileSystem.getLocal(conf);
        FileSystem dst = FileSystem.get(conf);
        Path srcpath = new Path(localFile);
        Path dstpath = new Path(hadoopFile);
        FileUtil.copy(src, srcpath, dst, dstpath, false, conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void putMeta() throws IOException {
    LOG.info(MessageFormat.format("setting patched properties: {0} -> {1}", storage.getPatchProperties(),
            getNextDirectory()));//  w w  w. jav a 2 s . c  o m
    FileUtil.copy(storage.getFileSystem(), storage.getPatchProperties(), storage.getFileSystem(),
            getNextProperties(), false, storage.getConfiguration());
}

From source file:com.cloudera.flume.collector.CopyToHdfs.java

License:Apache License

public static void main(String[] argv) throws IOException {
    if (argv.length < 2) {
        System.out.println("Need to specify arguments <src> <dst>");
        System.exit(-1);/*www.  j a  v  a  2 s.  co  m*/
    }
    Configuration conf = new Configuration();

    Path srcPath = new Path(argv[0]);
    FileSystem srcFs = srcPath.getFileSystem(conf);

    Path dstPath = new Path(argv[1]);
    FileSystem dstFs = dstPath.getFileSystem(conf);

    // dfs.copyFromLocalFile(false, psrc, pdst);
    FileUtil.copy(srcFs, srcPath, dstFs, dstPath, false, conf);

}

From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java

License:Apache License

@DriverInit
public void copyFilesToWorking() throws IOException {
    // Copy the input files into the 'workingDir'
    FileSystem fs = FileSystem.get(getConf());

    this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString());
    fs.mkdirs(workingDirectory);/*from   w w  w . j a v a 2  s .co  m*/

    FileStatus[] files = fs.globStatus(new Path(context.getInput()));
    for (FileStatus file : files) {
        Path dest = new Path(workingDirectory, file.getPath().getName());
        FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf());
    }
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in the Distributed Cache.
 * <p>//from  ww w  .j  a v  a2  s.c o m
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir"));
    if (!hadoopTmpDir.exists()) {
        hadoopTmpDir.mkdir();
    }
    File file = new File(hadoopTmpDir, serializeToLocalFile);
    FileSystem fS = FileSystem.get(conf);

    ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file));
    out.writeObject(obj);
    out.close();

    if (fS.equals(FileSystem.getLocal(conf))) {
        return;
    }

    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF);
    if (tmpHdfsFolder == null) {
        // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
        // This folder will be used across the cluster for location the instances. This way, tasktrackers
        // that are being run as different user will still be able to locate this folder
        tmpHdfsFolder = conf.get("hadoop.tmp.dir");
        conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder);
    }
    Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }
    FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf);
    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

public void copyInitialState(Path origAppDir) throws IOException {
    // locate previous snapshot
    String newAppDir = this.dag.assertAppPath();

    FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
    // read snapshot against new dependencies
    Object snapshot = recoveryHandler.restore();
    if (snapshot == null) {
        throw new IllegalArgumentException("No previous application state found in " + origAppDir);
    }/*from w  w  w .j a  v  a  2  s .c  om*/
    InputStream logIs = recoveryHandler.getLog();

    // modify snapshot state to switch app id
    ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf);
    Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

    FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
    // remove the path that was created by the storage agent during deserialization and replacement
    fs.delete(checkpointPath, true);

    // write snapshot to new location
    recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
    recoveryHandler.save(snapshot);
    OutputStream logOs = recoveryHandler.rotateLog();
    IOUtils.copy(logIs, logOs);
    logOs.flush();
    logOs.close();
    logIs.close();

    // copy sub directories that are not present in target
    FileStatus[] lFiles = fs.listStatus(origAppDir);
    for (FileStatus f : lFiles) {
        if (f.isDirectory()) {
            String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir);
            if (!fs.exists(new Path(targetPath))) {
                LOG.debug("Copying {} to {}", f.getPath(), targetPath);
                FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
                //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf);
            } else {
                LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
                //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777));
            }
        }
    }

}

From source file:com.linkedin.pinot.filesystem.HadoopPinotFS.java

License:Apache License

/**
 * Note that this method copies within a cluster. If you want to copy outside the cluster, you will
 * need to create a new configuration and filesystem. Keeps files if copy/move is partial.
 *///from w w  w . j a  va2 s.co m
@Override
public boolean copy(URI srcUri, URI dstUri) throws IOException {
    Path source = new Path(srcUri);
    Path target = new Path(dstUri);
    RemoteIterator<LocatedFileStatus> sourceFiles = hadoopFS.listFiles(source, true);
    if (sourceFiles != null) {
        while (sourceFiles.hasNext()) {
            boolean succeeded = FileUtil.copy(hadoopFS, sourceFiles.next().getPath(), hadoopFS, target, true,
                    hadoopConf);
            if (!succeeded) {
                return false;
            }
        }
    }
    return true;
}

From source file:com.mvdb.etl.actions.ActionUtils.java

License:Apache License

public static void copyLocalDirectoryToHdfsDirectory(String localDirectory, String hdfsDirectory)
        throws Throwable {
    String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_HADOOP_HOME);
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.addResource(new Path(hdfsHome + "/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf);

    FileSystem localFileSystem = FileSystem.get(new org.apache.hadoop.conf.Configuration());

    Path localDirectoryPath = new Path(localDirectory);
    Path hdfsDirectoryPath = new Path(hdfsDirectory);

    if (hdfsFileSystem.exists(hdfsDirectoryPath)) {
        boolean deleteSuccess = hdfsFileSystem.delete(hdfsDirectoryPath, true);
        if (deleteSuccess == false) {
            throw new RuntimeException("Unable to delete " + hdfsDirectoryPath.toString());
        }//  ww  w  .  j a  v  a 2  s.  c o  m
    }
    if (!localFileSystem.exists(localDirectoryPath)) {
        throw new RuntimeException("Input directory " + localDirectoryPath + " not found");
    }
    FileStatus fileStatus1 = localFileSystem.getFileStatus(localDirectoryPath);
    if (!fileStatus1.isDir()) {
        throw new RuntimeException("Input " + localDirectoryPath + " should be a directory");
    }
    if (hdfsFileSystem.exists(hdfsDirectoryPath)) {
        throw new RuntimeException("Output " + hdfsDirectoryPath + "already exists");
    }

    logger.info("Attempting Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString());
    FileUtil.copy(localFileSystem, localDirectoryPath, hdfsFileSystem, hdfsDirectoryPath, false, conf);
    logger.info("-Completed Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString());

}

From source file:com.pegasus.ResultInfo.java

License:Apache License

public int run(final String[] args) throws Exception {

    Configuration conf = getConf();
    final FileSystem fs = FileSystem.get(conf);
    edge_path = new Path(conf.get("edge_path"));
    all_vertices = new Path(conf.get("all_vertices"));
    curbm_path = new Path(conf.get("iteration_state"));
    tempbm_path = new Path(conf.get("stage1out"));
    nextbm_path = new Path(conf.get("stage2out"));
    output_path = new Path(conf.get("stage3out"));
    grapherOut_path = new Path(conf.get("grapherout"));
    nreducers = Integer.parseInt(conf.get("num_reducers"));
    local_output_path = conf.get("local_output");

    // initital cleanup
    fs.delete(tempbm_path, true);// ww  w .ja va 2  s  .  c om
    fs.delete(nextbm_path, true);
    fs.delete(output_path, true);
    fs.delete(curbm_path, true);
    fs.delete(grapherOut_path, true);
    FileUtil.fullyDelete(new File(local_output_path));
    fs.mkdirs(curbm_path);
    //fs.mkdirs(grapherOut_path);

    FileStatus[] statusArray = fs.listStatus(all_vertices);
    for (int index = 0; index < statusArray.length; index++) {
        Path temp = statusArray[index].getPath();
        FileUtil.copy(fs, temp, fs, curbm_path, false, conf);
    }

    make_symmetric = 1;

    System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");

    // Iteratively calculate neighborhood function. 
    // rotate directory
    for (int i = cur_iter; i < MAX_ITERATIONS; i++) {
        cur_iter++;

        System.out.println("configStage1");
        JobClient.runJob(configStage1());
        System.out.println("configStage2");
        JobClient.runJob(configStage2());
        System.out.println("configStage3");
        JobClient.runJob(configStage3());

        FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

        // copy neighborhood information from HDFS to local disk, and read it!
        String new_path = local_output_path + "/" + i;
        fs.copyToLocalFile(output_path, new Path(new_path));
        ResultInfo ri = readIterationOutput(new_path);

        changed_nodes[iter_counter] = ri.changed;
        changed_nodes[iter_counter] = ri.unchanged;

        iter_counter++;

        System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged);
        fs.delete(curbm_path);
        fs.delete(tempbm_path);
        fs.delete(output_path);
        fs.rename(nextbm_path, curbm_path);

        // Stop when the minimum neighborhood doesn't change
        if (ri.changed == 0) {
            System.out.println("All the component ids converged. Finishing...");
            fs.rename(curbm_path, grapherOut_path);
            break;
        }
    }
    FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

    // finishing.
    System.out.println("\n[PEGASUS] Connected component computed.");
    System.out.println("[PEGASUS] Total Iteration = " + iter_counter);
    return 0;
}

From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java

License:Apache License

public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
    FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
}