Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:com.cloudera.cdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * @param fs/*from   w  w  w  .  j ava 2s  .  c  om*/
 *            File system where to upload the jar.
 * @param localJarPath
 *            The local path where we find the jar.
 * @param md5sum
 *            The MD5 sum of the local jar.
 * @param remoteJarPath
 *            The remote path where to upload the jar.
 * @param remoteMd5Path
 *            The remote path where to create the MD5 file.
 * 
 * @throws IOException
 */
private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath,
        Path remoteMd5Path) throws IOException {

    logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(),
            remoteJarPath.toUri().toASCIIString());
    fs.copyFromLocalFile(localJarPath, remoteJarPath);
    // create the MD5 file for this jar.
    createMd5SumFile(fs, md5sum, remoteMd5Path);

    // we need to clean the tmp files that are are created by JarFinder after the JVM exits.
    if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteJarPath);
    }
    // same for the MD5 file.
    if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteMd5Path);
    }
}

From source file:com.cloudera.flume.PerfHdfsIO.java

License:Apache License

@Test
public void testCopy() throws IOException, InterruptedException {

    Benchmark b = new Benchmark("hdfs seqfile copy");
    b.mark("begin");

    MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem();
    b.mark("disk_loaded");

    File tmp = File.createTempFile("test", "tmp");
    tmp.deleteOnExit();/*from ww w  .j  a  v a 2s.c om*/
    SeqfileEventSink sink = new SeqfileEventSink(tmp);
    sink.open();
    b.mark("localdisk_write_started");

    EventUtil.dumpAll(mem, sink);

    b.mark("local_disk_write done");

    sink.close();

    FlumeConfiguration conf = FlumeConfiguration.get();
    Path src = new Path(tmp.getAbsolutePath());
    Path dst = new Path("hdfs://localhost/testfile");
    FileSystem hdfs = dst.getFileSystem(conf);
    hdfs.deleteOnExit(dst);

    b.mark("hdfs_copy_started");
    hdfs.copyFromLocalFile(src, dst);
    b.mark("hdfs_copy_done");
    hdfs.close();
    b.done();
}

From source file:com.cloudera.livy.test.jobs.SQLGetTweets.java

License:Apache License

@Override
public List<String> call(JobContext jc) throws Exception {
    InputStream source = getClass().getResourceAsStream("/testweet.json");

    // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem).
    URI input;/*from  w ww  .  j  a v  a 2  s . com*/
    File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir());
    Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING);
    FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration());
    if ("file".equals(fs.getUri().getScheme())) {
        input = local.toURI();
    } else {
        String uuid = UUID.randomUUID().toString();
        Path target = new Path("/tmp/" + uuid + "-tweets.json");
        fs.copyFromLocalFile(new Path(local.toURI()), target);
        input = target.toUri();
    }

    SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx();
    DataFrame df = sqlctx.jsonFile(input.toString());
    df.registerTempTable("tweets");

    DataFrame topTweets = sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10");
    List<String> tweetList = new ArrayList<>();
    for (Row r : topTweets.collect()) {
        tweetList.add(r.toString());
    }
    return tweetList;
}

From source file:com.collective.celos.ci.deploy.HdfsDeployer.java

License:Apache License

public void deploy() throws Exception {

    FileSystem fs = context.getFileSystem();
    final String hdfsDirLocalPath = String.format(LOCAL_HDFS_PATTERN, context.getDeployDir());

    final File hdfsDirLocal = new File(hdfsDirLocalPath);
    if (!hdfsDirLocal.exists()) {
        throw new IllegalStateException(hdfsDirLocalPath + " not found local FS");
    }//  ww  w .j a va  2 s  .c  o m

    undeploy();

    Path dst = getDestinationHdfsPath();
    fs.mkdirs(dst);
    String[] childFiles = hdfsDirLocal.list();
    for (String child : childFiles) {
        fs.copyFromLocalFile(new Path(hdfsDirLocalPath, child), dst);
    }
}

From source file:com.datasalt.pangool.solr.TupleSolrOutputFormat.java

License:Apache License

private void setupSolrHomeCache(File solrHome, Configuration conf) throws IOException {
    if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) {
        throw new IOException("Invalid solr.home: " + solrHome);
    }/* w  ww  .  ja v a 2 s. c  o  m*/
    localSolrHome = solrHome.getAbsolutePath();
    File tmpZip = File.createTempFile("solr", "zip");
    createZip(solrHome, tmpZip);
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    zipName = hdfsZipName;

    Path zipPath = new Path("/tmp", zipName);
    FileSystem fs = FileSystem.get(conf);
    fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + zipName);

    DistributedCache.addCacheArchive(baseZipUrl, conf);
    LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(conf)));
}

From source file:com.datatorrent.stram.client.StramClientUtils.java

License:Apache License

public static void copyFromLocalFileNoChecksum(FileSystem fs, File fromLocal, Path toDFS) throws IOException {
    // This is to void the hadoop FileSystem API to perform checksum on the local file
    // This "feature" has caused a lot of headache because the local file can be copied from HDFS and modified,
    // and the checksum will fail if the file is again copied to HDFS
    try {//w  w w . j  a va 2 s  .  co m
        new File(fromLocal.getParentFile(), "." + fromLocal.getName() + ".crc").delete();
    } catch (Exception ex) {
        // ignore
    }
    fs.copyFromLocalFile(new Path(fromLocal.toURI()), toDFS);
}

From source file:com.epam.hadoop.nv.yarn.Client.java

License:Apache License

private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {/*w  w w  .  ja  v a 2s  .c  o  m*/
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
            scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.github.hdl.tensorflow.yarn.app.Client.java

License:Apache License

private String copyLocalFileToDfs(FileSystem fs, String appId, String srcFilePath, String dstFileName)
        throws IOException {
    String suffix = TFYarnConstants.APP_NAME + "/" + appId + "/" + dstFileName;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    if (srcFilePath != null) {
        fs.copyFromLocalFile(new Path(srcFilePath), dst);
    }//w  w  w  . j  a v a  2  s  .  co m
    LOG.info("Copy " + srcFilePath + " to " + dst.toString());
    return dst.toString();
}

From source file:com.github.hdl.tensorflow.yarn.app.TFContainer.java

License:Apache License

public void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {

    execCmd("pwd");
    execCmd("ls -l");
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    LOG.info("copy: " + fileSrcPath + " ===> " + dst.toString());
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {//ww w  . ja  va2 s .c o m
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }

    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(URL.fromURI(dst.toUri()), LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.inmobi.conduit.Conduit.java

License:Apache License

private void copyInputFormatJarToClusterFS(Cluster cluster, String inputFormatSrcJar) throws IOException {
    FileSystem clusterFS = FileSystem.get(cluster.getHadoopConf());
    // create jars path inside /conduit/system/tmp path
    Path jarsPath = new Path(cluster.getTmpPath(), "jars");
    if (!clusterFS.exists(jarsPath)) {
        clusterFS.mkdirs(jarsPath);//  w w w  . j a  v a2  s .  co  m
    }
    // copy inputFormat source jar into /conduit/system/tmp/jars path
    Path inputFormatJarDestPath = new Path(jarsPath, "conduit-distcp-current.jar");
    if (clusterFS.exists(inputFormatJarDestPath)) {
        clusterFS.delete(inputFormatJarDestPath, true);
    }
    clusterFS.copyFromLocalFile(new Path(inputFormatSrcJar), inputFormatJarDestPath);
}