Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.dutir.lucene.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveClassPathToJob(JobConf jobConf) throws IOException {
    String[] jars = findJarFiles(
            new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") });
    for (String jarFile : jars) {
        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeTemporaryFile(jobConf, filename);
        FileSystem defFS = FileSystem.get(jobConf);
        defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
        DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf);
    }//from  www .  ja  v a  2 s  .com
    DistributedCache.createSymlink(jobConf);
}

From source file:org.gbif.ocurrence.index.solr.SolrOutputFormat.java

License:Apache License

public static void setupSolrHomeCache(File solrHome, Configuration jobConf) throws IOException {
    if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) {
        throw new IOException("Invalid solr.home: " + solrHome);
    }//from ww  w . j  a  va  2s  .  c o m
    File tmpZip = File.createTempFile("solr", "zip");
    createZip(solrHome, tmpZip);
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    jobConf.set(ZIP_NAME, hdfsZipName);

    Path zipPath = new Path("/tmp", getZipName(jobConf));
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf));

    DistributedCache.addCacheArchive(baseZipUrl, jobConf);
    LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(jobConf)));
    // Actually send the path for the configuration zip file
    jobConf.set(SETUP_OK, zipPath.toString());
}

From source file:org.hdl.caffe.yarn.app.Client.java

License:Apache License

private String copyLocalFileToDfs(FileSystem fs, String appId, String srcFilePath, String dstFileName)
        throws IOException {
    String suffix = CaffeYarnConstants.APP_NAME + "/" + appId + "/" + dstFileName;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    if (srcFilePath != null) {
        fs.copyFromLocalFile(new Path(srcFilePath), dst);
    }//from w  w  w .j av  a  2 s .  co m
    LOG.info("Copy " + srcFilePath + " to " + dst.toString());
    return dst.toString();
}

From source file:org.hdl.tensorflow.yarn.util.Utils.java

License:Apache License

public static Path copyLocalFileToDfs(FileSystem fs, String appId, Path srcPath, String dstFileName)
        throws IOException {
    Path dstPath = new Path(fs.getHomeDirectory(),
            Constants.DEFAULT_APP_NAME + Path.SEPARATOR + appId + Path.SEPARATOR + dstFileName);
    LOG.info("Copying " + srcPath + " to " + dstPath);
    fs.copyFromLocalFile(srcPath, dstPath);
    return dstPath;
}

From source file:org.icgc.dcc.submission.validation.key.KVTestUtils.java

License:Open Source License

public static void copyDirectory(FileSystem fileSystem, File sourceDir, Path targetDir) throws IOException {
    for (val file : sourceDir.listFiles()) {
        val source = new Path(file.toURI());
        val target = new Path(targetDir, file.getName());

        log.info("Copying file: from '{}' to '{}'", source, target);
        fileSystem.copyFromLocalFile(source, target);
    }//w  ww .  j  a v a2s .c  o m
}

From source file:org.janusgraph.hadoop.config.job.AbstractDistCacheConfigurer.java

License:Apache License

protected Path uploadFileIfNecessary(FileSystem localFS, Path localPath, FileSystem destFS) throws IOException {

    // Fast path for local FS -- DistributedCache + local JobRunner seems copy/link files automatically
    if (destFS.equals(localFS)) {
        log.debug("Skipping file upload for {} (destination filesystem {} equals local filesystem)", localPath,
                destFS);/*from ww  w  .j  av  a2  s.co  m*/
        return localPath;
    }

    Path destPath = new Path(destFS.getHomeDirectory() + "/" + HDFS_TMP_LIB_DIR + "/" + localPath.getName());

    Stats fileStats = null;

    try {
        fileStats = compareModtimes(localFS, localPath, destFS, destPath);
    } catch (IOException e) {
        log.warn("Unable to read or stat file: localPath={}, destPath={}, destFS={}", localPath, destPath,
                destFS);
    }

    if (fileStats != null && !fileStats.isRemoteCopyCurrent()) {
        log.debug("Copying {} to {}", localPath, destPath);
        destFS.copyFromLocalFile(localPath, destPath);
        if (null != fileStats.local) {
            final long mtime = fileStats.local.getModificationTime();
            log.debug("Setting modtime on {} to {}", destPath, mtime);
            destFS.setTimes(destPath, mtime, -1); // -1 means leave atime alone
        }
    }

    return destPath;
}

From source file:org.kitesdk.cli.commands.TestCreateDatasetWithExistingData.java

License:Apache License

@BeforeClass
public static void createDatasetFromCSV() throws Exception {
    String sample = "target/users.csv";
    String avsc = "target/user.avsc";
    BufferedWriter writer = Files.newWriter(new File(sample), CSVSchemaCommand.SCHEMA_CHARSET);
    writer.append("id,username,email\n");
    writer.append("1,test,test@example.com\n");
    writer.append("2,user,user@example.com\n");
    writer.close();//  w w  w  . ja  va2  s .  c om

    TestUtil.run("delete", "dataset:file:target/data/users");
    TestUtil.run("-v", "csv-schema", sample, "-o", avsc, "--class", "User");
    TestUtil.run("-v", "create", "dataset:file:target/data/users", "-s", avsc, "-f", "parquet");
    TestUtil.run("-v", "csv-import", sample, "dataset:file:target/data/users");

    USER_SCHEMA = Schemas.fromAvsc(new File(avsc));

    FileSystem fs = LocalFileSystem.getInstance();
    FileStatus[] stats = fs.listStatus(new Path("target/data/users"));
    Path parquetFile = null;
    for (FileStatus stat : stats) {
        if (stat.getPath().toString().endsWith(".parquet")) {
            parquetFile = stat.getPath();
            break;
        }
    }

    // make a directory with the Parquet file
    fs.mkdirs(existingDataPath);
    fs.copyFromLocalFile(parquetFile, existingDataPath);
    fs.mkdirs(existingPartitionedPathWithPartition);
    fs.copyFromLocalFile(parquetFile, existingPartitionedPathWithPartition);
}

From source file:org.kitesdk.tools.JobClasspathHelper.java

License:Apache License

/**
 * @param fs//w  w w .  j a  va  2s .c o m
 *            File system where to upload the jar.
 * @param localJarPath
 *            The local path where we find the jar.
 * @param md5sum
 *            The MD5 sum of the local jar.
 * @param remoteJarPath
 *            The remote path where to upload the jar.
 * @param remoteMd5Path
 *            The remote path where to create the MD5 file.
 * 
 * @throws IOException
 */
private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath,
        Path remoteMd5Path) throws IOException {

    LOG.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString());
    fs.copyFromLocalFile(localJarPath, remoteJarPath);
    // create the MD5 file for this jar.
    createMd5SumFile(fs, md5sum, remoteMd5Path);

    // we need to clean the tmp files that are are created by JarFinder after the JVM exits.
    if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteJarPath);
    }
    // same for the MD5 file.
    if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) {
        fs.deleteOnExit(remoteMd5Path);
    }
}

From source file:org.mrgeo.utils.DependencyLoader.java

License:Apache License

private static void addFileToClasspath(Configuration conf, Set<String> existing, FileSystem fs, Path hdfsBase,
        File file) throws IOException {
    Path hdfsPath = new Path(hdfsBase, file.getName());
    if (!existing.contains(hdfsPath.toString())) {
        if (fs.exists(hdfsPath)) {
            // check the timestamp and exit if the one in hdfs is "newer"
            FileStatus status = fs.getFileStatus(hdfsPath);

            if (file.lastModified() <= status.getModificationTime()) {
                log.debug(file.getPath() + " up to date");
                DistributedCache.addFileToClassPath(hdfsPath, conf, fs);

                existing.add(hdfsPath.toString());
                return;
            }/*from   ww w .jav a2s.  c om*/
        }

        // copy the file...
        log.debug("Copying " + file.getPath() + " to HDFS for distribution");

        fs.copyFromLocalFile(new Path(file.getCanonicalFile().toURI()), hdfsPath);
        DistributedCache.addFileToClassPath(hdfsPath, conf, fs);
        existing.add(hdfsPath.toString());
    }
}

From source file:org.notmysock.tez.BroadcastTest.java

License:Apache License

public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
    System.out.println("Running BroadcastTest");
    // conf and UGI
    TezConfiguration tezConf;/*from  ww w . j av  a 2s  . co  m*/
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
            + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);

    Path jobJar = new Path(stagingDir, "job.jar");
    fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put("job.jar", createLocalResource(fs, jobJar));

    TezClient tezSession = null;
    // needs session or else TaskScheduler does not hold onto containers
    tezSession = TezClient.create("BroadcastTest", tezConf);
    tezSession.addAppMasterLocalFiles(localResources);
    tezSession.start();

    DAGClient dagClient = null;

    try {
        DAG dag = createDAG(fs, tezConf, stagingDir, localResources);

        dag.addTaskLocalFiles(localResources);

        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);

        // monitoring
        DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}