Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst,
        boolean deleteSource, boolean overwrite, Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}//from w w w.  j  a v  a  2 s.c  o m
 */
@Override
public void copyFromLocalFile(final boolean delSrc, final boolean overwrite, final Path[] srcs, final Path dst)
        throws IOException {
    final Configuration conf = getConf();
    FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf);
}

From source file:com.baifendian.swordfish.common.hadoop.HdfsClient.java

License:Apache License

/**
 * copy ?/*  ww  w.j  a va 2s .c om*/
 *
 * @param srcPath      hdfs ?
 * @param dstPath      hdfs 
 * @param deleteSource ??
 * @param overwrite    ?
 * @return ??
 */
public boolean copy(String srcPath, String dstPath, boolean deleteSource, boolean overwrite)
        throws HdfsException {
    Path srcPathObj = new Path(srcPath);
    Path dstPathObj = new Path(dstPath);

    try {
        return FileUtil.copy(fileSystem, srcPathObj, fileSystem, dstPathObj, deleteSource, overwrite,
                fileSystem.getConf());
    } catch (IOException e) {
        LOGGER.error("Copy exception", e);
        throw new HdfsException("Copy exception", e);
    }
}

From source file:com.cloudera.recordbreaker.analyzer.AvroDataDescriptor.java

License:Open Source License

public void prepareAvroFile(FileSystem srcFs, FileSystem dstFs, Path dst, Configuration conf)
        throws IOException {
    FileUtil.copy(srcFs, getFilename(), dstFs, dst, false, true, conf);
}

From source file:com.datasalt.pangool.utils.HadoopUtils.java

License:Apache License

public static void synchronize(FileSystem fS1, Path p1, FileSystem fS2, Path p2) throws IOException {
    deleteIfExists(fS2, p2);// w  w w.j av a2 s  . c  o m
    FileUtil.copy(fS1, p1, fS2, p2, false, false, fS1.getConf());
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException {
    StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16));
    for (String localFile : files) {
        Path src = new Path(localFile);
        String filename = src.getName();
        Path dst = new Path(basePath, filename);
        URI localFileURI = null;/*w  w w.  j ava2  s  .  c  o m*/
        try {
            localFileURI = new URI(localFile);
        } catch (URISyntaxException e) {
            throw new IOException(e);
        }
        if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) {
            LOG.info("Copy {} from local filesystem to {}", localFile, dst);
            fs.copyFromLocalFile(false, true, src, dst);
        } else {
            LOG.info("Copy {} from DFS to {}", localFile, dst);
            FileUtil.copy(fs, src, fs, dst, false, true, conf);
        }
        if (csv.length() > 0) {
            csv.append(LIB_JARS_SEP);
        }
        csv.append(dst.toString());
    }
    return csv.toString();
}

From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public File handleFileResult(Path path) {
    FileSystem fs = null;//  w  ww  .j a  v a 2  s  .c  o m
    try {
        Path outputPartPath = new Path(path, "part-r-00000");
        // copy file from HDFS to local temporary file
        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Source file is " + outputPartPath.toString());
        Configuration conf = new Configuration();

        HBaseStorage.configureHBaseConfig(conf);

        HBaseConfiguration.addHbaseResources(conf);
        fs = FileSystem.get(conf);
        File createTempFile = File.createTempFile("vcf", "out");

        createTempFile.delete();
        Path outPath = new Path(createTempFile.toURI());
        FileSystem localSystem = FileSystem.get(new Configuration());

        Logger.getLogger(FeaturesByFilterPlugin.class.getName())
                .info("Destination file is " + outPath.toString());
        if (!fs.exists(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found");
        }

        if (!fs.isFile(outputPartPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file");
        }

        if (localSystem.exists(outPath)) {
            Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists");
        }
        // doesn't quite work yet, no time to finish before poster, check results manually on hdfs

        FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf);
        return new File(outPath.toUri());
    } catch (IOException ex) {
        Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex);
    } finally {
        if (fs != null) {
            try {
                fs.delete(path, true);
            } catch (IOException ex) {
                Logger.getLogger(VCFDumperPlugin.class.getName())
                        .warn("IOException when clearing after text output", ex);
            }
        }
    }

    return null;
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static void copyFileOnHDFS(String originalDir, String newDir) throws IOException {
    Path originalPath = new Path(originalDir);
    Path newPath = new Path(newDir);
    boolean deleteSource = false;
    boolean overwrite = true;

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    if (fs.exists(originalPath)) {
        FileUtil.copy(fs, originalPath, fs, newPath, deleteSource, overwrite, job);
    }/*from w  w  w .  ja  v  a  2  s  .  com*/
}

From source file:com.ibm.bi.dml.yarn.DMLYarnClient.java

License:Open Source License

/**
 *    /*from w  w w .j a  v a  2s.c om*/
 * @param appId
 * @throws ParseException
 * @throws IOException
 * @throws DMLRuntimeException
 * @throws InterruptedException 
 */
@SuppressWarnings("deprecation")
private void copyResourcesToHdfsWorkingDir(YarnConfiguration yconf, String hdfsWD)
        throws ParseException, IOException, DMLRuntimeException, InterruptedException {
    FileSystem fs = FileSystem.get(yconf);

    //create working directory
    MapReduceTool.createDirIfNotExistOnHDFS(hdfsWD, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);

    //serialize the dml config to HDFS file 
    //NOTE: we do not modify and ship the absolute scratch space path of the current user
    //because this might result in permission issues if the app master is run with a different user
    //(runtime plan migration during resource reoptimizations now needs to use qualified names
    //for shipping/reading intermediates) TODO modify resource reoptimizer on prototype integration.
    Path confPath = new Path(hdfsWD, DML_CONFIG_NAME);
    FSDataOutputStream fout = fs.create(confPath, true);
    //_dmlConfig.makeQualifiedScratchSpacePath(); 
    fout.writeBytes(_dmlConfig.serializeDMLConfig() + "\n");
    fout.close();
    _hdfsDMLConfig = confPath.makeQualified(fs).toString();
    LOG.debug("DML config written to HDFS file: " + _hdfsDMLConfig + "");

    //serialize the dml script to HDFS file
    Path scriptPath = new Path(hdfsWD, DML_SCRIPT_NAME);
    FSDataOutputStream fout2 = fs.create(scriptPath, true);
    fout2.writeBytes(_dmlScript);
    fout2.close();
    _hdfsDMLScript = scriptPath.makeQualified(fs).toString();
    LOG.debug("DML script written to HDFS file: " + _hdfsDMLScript + "");

    // copy local jar file to HDFS (try to get the original jar filename)
    String fname = getLocalJarFileNameFromEnvConst();
    if (fname == null) {
        //get location of unpacked jar classes and repackage (if required)
        String lclassFile = DMLYarnClient.class.getProtectionDomain().getCodeSource().getLocation().getPath()
                .toString();
        File flclassFile = new File(lclassFile);
        if (!flclassFile.isDirectory()) //called w/ jar 
            fname = lclassFile;
        else //called w/ unpacked jar (need to be repackaged)   
            fname = createJar(lclassFile);
    }
    Path srcPath = new Path(fname);
    Path dstPath = new Path(hdfsWD, srcPath.getName());
    FileUtil.copy(FileSystem.getLocal(yconf), srcPath, fs, dstPath, false, true, yconf);
    _hdfsJarFile = dstPath.makeQualified(fs).toString();
    LOG.debug(
            "Jar file copied from local file: " + srcPath.toString() + " to HDFS file: " + dstPath.toString());
}

From source file:com.twitter.hraven.etl.JobFilePartitioner.java

License:Apache License

/**
 * @param hdfs FileSystem handle//from w  w w. jav  a  2 s.  co m
 * @param f to copy
 * @param conf configuration to use for copying.
 * @param targetDir directory to copy said file to.
 * @throws IOException
 */
private void copy(FileSystem hdfs, FileStatus f, Configuration conf, Path targetDir) throws IOException {
    long startNanos = System.nanoTime();
    FileUtil.copy(hdfs, f.getPath(), hdfs, targetDir, false, true, conf);
    long estimatedTimeNanos = System.nanoTime() - startNanos;
    // Nanos are 10^-9, millis 10^-3
    long durationMillis = estimatedTimeNanos / 1000000;
    if (durationMillis > 3000) {
        String msg = "It took " + durationMillis / 1000 + " seconds to copy " + f.getPath().getName() + " of "
                + f.getLen() + " bytes.";
        LOG.warn(msg);
    }
}

From source file:com.yata.core.HDFSManager.java

License:Apache License

/**
 *
 * @param hdfsTestDataSourceFile/* w  w w  . j av a 2 s . co m*/
 * @param hdfsTestDataTargetFile
 * @throws IOException
 *
 * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>>
 */
public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile)
        throws OozieClientException {

    System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile
            + " : Into :-> " + hdfsTestDataTargetFile);

    FileSystem hdfs = null;
    Path hdfsTestDataSource = null;
    Path hdfsTestDataTarget = null;

    try {

        hdfs = getHdfsFileSytem();

        System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs);

        System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri());
        System.out.println(
                "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> "
                + hdfs.exists(hdfs.getHomeDirectory()));

        hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile);
        hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile);

        System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING...");
    }

    FileUtil hdfsUtil = new FileUtil();

    try {

        hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf());

        System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING...");
    }

    /**
     * IMPORTANT
     * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to
     * change the permission for Hive/Hadoop User to move/delete the file once processed...
     */
    try {

        hdfs.setPermission(hdfsTestDataTarget,
                new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE));
    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className
                + " : IOException while Changing HDFS File Permissions - EXITING...");
    }

}