Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:io.dstream.tez.utils.HadoopUtils.java

License:Apache License

/**
 *
 *///ww w. j  a  va2 s  .co m
private static synchronized void provisioinResourceToFs(FileSystem fs, Path sourcePath, Path destPath)
        throws Exception {
    if (logger.isDebugEnabled()) {
        logger.debug("Provisioning '" + sourcePath + "' to " + destPath);
    }
    if (!fs.exists(destPath)) {
        fs.copyFromLocalFile(sourcePath, destPath);
    } else {
        logger.debug("Skipping provisioning of " + destPath + " since it already exists.");
    }
}

From source file:io.gzinga.hadoop.TestHadoopGZipRandomAccess.java

License:Apache License

@Test
public void testGZipOutputStream() {
    try {//from   w w  w. j a  v  a2  s .  co  m
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.get(conf);
        fs.mkdirs(new Path("target/test"));
        GZipOutputStreamRandomAccess gzip = new GZipOutputStreamRandomAccess(
                fs.create(new Path("target/test/testfile")));
        byte[] str = "This is line\n".getBytes();
        for (int i = 1; i <= 10000; i++) {
            if (i % 100 == 0) {
                gzip.addOffset(i / 100l);
            }
            gzip.write(str);
        }
        Assert.assertEquals(gzip.getOffsetMap().size(), 100);
        gzip.close();
        fs.copyFromLocalFile(new Path(fs.getWorkingDirectory().toString() + "/target/test-classes/testfile1"),
                new Path("target/test/testfile1"));
        FSDataInputStream fin = fs.open(new Path("target/test/testfile"));
        long len = fs.getFileStatus(new Path("target/test/testfile")).getLen();
        SeekableGZipDataInputStream sin = new SeekableGZipDataInputStream(fin, len);
        Assert.assertTrue(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin));
        fin = fs.open(new Path("target/test/testfile1"));
        sin = new SeekableGZipDataInputStream(fin, len);
        Assert.assertFalse(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin));
        fin = fs.open(new Path("target/test/testfile"));
        sin = new SeekableGZipDataInputStream(fin, len);
        GZipInputStreamRandomAccess gzin = new GZipInputStreamRandomAccess(sin);
        Assert.assertEquals(gzin.getMetadata().size(), 100);
        Assert.assertTrue(gzin.getMetadata().containsKey(1l));
        Assert.assertTrue(gzin.getMetadata().containsKey(100l));
        Assert.assertFalse(gzin.getMetadata().containsKey(200l));
        gzin.jumpToIndex(50l);
        int count1 = 0;
        while (true) {
            int l = gzin.read();
            if (l == -1) {
                break;
            }
            count1++;
        }
        gzin.jumpToIndex(60l);
        int count2 = 0;
        while (true) {
            int l = gzin.read();
            if (l == -1) {
                break;
            }
            count2++;
        }
        Assert.assertTrue(count1 > count2);
        gzin.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    }
}

From source file:io.hops.tensorflow.Client.java

License:Apache License

private String addResource(FileSystem fs, ApplicationId appId, String srcPath, String dstDir, String dstName,
        DistributedCacheList distCache, Map<String, LocalResource> localResources, StringBuilder pythonPath)
        throws IOException {
    Path src = new Path(srcPath);

    if (dstDir == null) {
        dstDir = ".";
    }/*from w  w w  . ja  v a2  s. co m*/
    if (dstName == null) {
        dstName = src.getName();
    }

    Path baseDir = new Path(fs.getHomeDirectory(), Constants.YARNTF_STAGING + "/" + appId.toString());
    String dstPath;
    if (dstDir.startsWith(".")) {
        dstPath = dstName;
    } else {
        dstPath = dstDir + "/" + dstName;
    }
    Path dst = new Path(baseDir, dstPath);

    LOG.info("Copying from local filesystem: " + src + " -> " + dst);
    fs.copyFromLocalFile(src, dst);
    FileStatus dstStatus = fs.getFileStatus(dst);

    if (distCache != null) {
        LOG.info("Adding to distributed cache: " + srcPath + " -> " + dstPath);
        distCache.add(new DistributedCacheList.Entry(dstPath, dst.toUri(), dstStatus.getLen(),
                dstStatus.getModificationTime()));
    }

    if (localResources != null) {
        LOG.info("Adding to local environment: " + srcPath + " -> " + dstPath);
        LocalResource resource = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
                LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, dstStatus.getLen(),
                dstStatus.getModificationTime());
        localResources.put(dstPath, resource);
    }

    if (pythonPath != null) {
        pythonPath.append(File.pathSeparator).append(dstPath);
    }

    return dstName;
}

From source file:jadoop.HadoopGridJob.java

License:Open Source License

/**
 * Copies the file(s) on the local machine onto the temporary HDFS working
 * directory and make them available in the hadoop distributed cache so that
 * they appear in the working directory of the HadoopGridTask(s) when they
 * are running/* w  w w  .  j  a va  2s  .  co m*/
 * 
 * @param fs
 *            the hadoop HDFS filesystem
 * @param hdfsDirectory
 *            the path to the temporary working directory on the HDFS to
 *            which the files are to be copied.
 * @throws IOException
 *             if there is a problem copying the files to the HDFS or adding
 *             them to the hadoop distributed cache.
 * @throws URISyntaxException
 *             if there is a problem generating the URI used to add the file
 *             to the hadoop distributed cache.
 */
private void copyLocalFileToHDFS(FileSystem fs, Path hdfsDirectory) throws IOException, URISyntaxException {

    for (File localFile : files) {
        // get the path to the file on the local file system.
        Path fileRelativePath = new Path(localFile.getPath());

        /*
         * copy the file from the local file system to the temporary working
         * directory on the HDFS.
         */
        fs.copyFromLocalFile(fileRelativePath, hdfsDirectory);

        /*
         * Build a URI to the file on the HDFS so we can add it to the
         * working cache.
         * 
         * The value before the # gives the name of the file on the HDFS,
         * the value after the # gives the name that the file will have in
         * the cache (i.e. the working directory of the tasks).
         */
        URI uri = new URI(hdfsDirectory + "/" + localFile.getName() + "#" + localFile.getName());

        job.addCacheFile(uri);
    }
}

From source file:jadoop.HadoopGridJob.java

License:Open Source License

/**
 * Copies the archive file(s) on the local machine into the temporary
 * working directory on the hadoop HDFS. And also be makes them available in
 * the distributed working cache so the HadoopGridTask(s) can access them in
 * their working directory. Once the archive file(s) have been copied onto
 * the HDFS, a directory with the archive file(s)'s name will be created and
 * the contents of archive file(s) will unpacked into that directory
 * /*from   w  ww  .  j a  va2 s .c  om*/
 * @param fs
 *            the hadoop HDFS file system
 * @param hdfsDirectory
 *            path to the temporary working directory on the HDFS to which
 *            the archives are to be copied.
 * @throws IOException
 *             if there is a problem copying the archives to the HDFS or
 *             adding them to the hadoop distributed cache.
 * @throws URISyntaxException
 *             if there is a problem generating the URI used to add the
 *             archive to the hadoop distributed cache.
 */
private void copyLocalArchiveToHDFS(FileSystem fs, Path hdfsDirectory) throws IOException, URISyntaxException {
    for (File localArchive : archives) {
        Path archiveRelativePath = new Path(localArchive.getPath());

        fs.copyFromLocalFile(archiveRelativePath, hdfsDirectory);

        URI uri = new URI(hdfsDirectory + "/" + localArchive.getName() + "#" + localArchive.getName());
        job.addCacheArchive(uri);
    }
}

From source file:ml.shifu.shifu.util.CommonUtils.java

License:Apache License

/**
 * Sync up all local configuration files to HDFS.
 *
 * @throws IOException          If any exception on HDFS IO or local IO.
 * @throws NullPointerException If parameter {@code modelConfig} is null
 *//*from www.  j  av a 2  s .  co  m*/
public static boolean copyConfFromLocalToHDFS(ModelConfig modelConfig) throws IOException {
    FileSystem hdfs = HDFSUtils.getFS();
    FileSystem localFs = HDFSUtils.getLocalFS();

    PathFinder pathFinder = new PathFinder(modelConfig);

    Path pathModelSet = new Path(pathFinder.getModelSetPath(SourceType.HDFS));
    // don't check whether pathModelSet is exists, should be remove by user.
    hdfs.mkdirs(pathModelSet);

    // Copy ModelConfig
    Path srcModelConfig = new Path(pathFinder.getModelConfigPath(SourceType.LOCAL));
    Path dstModelConfig = new Path(pathFinder.getModelSetPath(SourceType.HDFS));
    hdfs.copyFromLocalFile(srcModelConfig, dstModelConfig);

    // Copy ColumnConfig
    Path srcColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.LOCAL));
    Path dstColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.HDFS));
    hdfs.copyFromLocalFile(srcColumnConfig, dstColumnConfig);

    // copy others
    Path srcVersion = new Path(pathFinder.getModelVersion(SourceType.LOCAL));
    if (localFs.exists(srcVersion)) {
        Path dstVersion = new Path(pathFinder.getModelVersion(SourceType.HDFS));
        hdfs.delete(dstVersion, true);
        hdfs.copyFromLocalFile(srcVersion, pathModelSet);
    }

    // Copy Models
    Path srcModels = new Path(pathFinder.getModelsPath(SourceType.LOCAL));
    if (localFs.exists(srcModels)) {
        Path dstModels = new Path(pathFinder.getModelsPath(SourceType.HDFS));
        hdfs.delete(dstModels, true);
        hdfs.copyFromLocalFile(srcModels, pathModelSet);
    }

    // Copy EvalSets
    Path evalsPath = new Path(pathFinder.getEvalsPath(SourceType.LOCAL));
    if (localFs.exists(evalsPath)) {
        for (FileStatus evalset : localFs.listStatus(evalsPath)) {
            EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalset.getPath().getName());
            if (evalConfig != null) {
                copyEvalDataFromLocalToHDFS(modelConfig, evalConfig.getName());
            }
        }
    }

    return true;
}

From source file:ml.shifu.shifu.util.CommonUtils.java

License:Apache License

/**
 * Sync-up the evalulation data into HDFS
 *
 * @param modelConfig//from w w w . ja va  2s  .  c om
 * @param evalName
 * @throws IOException
 */
public static void copyEvalDataFromLocalToHDFS(ModelConfig modelConfig, String evalName) throws IOException {
    EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalName);
    if (evalConfig != null) {
        FileSystem hdfs = HDFSUtils.getFS();
        FileSystem localFs = HDFSUtils.getLocalFS();
        PathFinder pathFinder = new PathFinder(modelConfig);

        Path evalDir = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.LOCAL));
        Path dst = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.HDFS));
        if (localFs.exists(evalDir) // local evaluation folder exists
                && localFs.getFileStatus(evalDir).isDir() // is directory
                && !hdfs.exists(dst)) {
            hdfs.copyFromLocalFile(evalDir, dst);
        }

        if (StringUtils.isNotBlank(evalConfig.getScoreMetaColumnNameFile())) {
            hdfs.copyFromLocalFile(new Path(evalConfig.getScoreMetaColumnNameFile()),
                    new Path(pathFinder.getEvalSetPath(evalConfig)));
        }
    }
}

From source file:name.abhijitsarkar.hadoop.distributedcache.CitationRetriever.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    /*/*from  ww  w .j  a  v a  2s  .c  om*/
     * Create an archive using the supplied codec name. The codec name is currently unused and the archive returned
     * is always Gzip. See the method for details
     */
    final Path compressedFilePath = new Path(compressFile(new Path(args[2]).toUri(), "gzip", conf));

    final FileSystem fs = FileSystem.get(conf);
    final String remoteLocation = File.separator + compressedFilePath.getName();
    LOGGER.debug("Copying from: {} to {}.", compressedFilePath.toUri(), remoteLocation);
    /*
     * Copy it to the HDFS from where it is distributed to all task nodes. GenericOptionsParser, if used, does it
     * behind the scenes
     */
    fs.copyFromLocalFile(compressedFilePath, new Path(remoteLocation));

    /*
     * There are easier ways to put files in the Distributed Cache, like using the GenericOptionsParser command line
     * arguments '-archives' but that will be too easy, won't it?
     */

    /* Create a symlink */

    final String[] symlinks = new String[] { removeExtension(compressedFilePath.getName()) };
    conf.set("symlinks", StringUtils.arrayToString(symlinks));

    DistributedCache.addCacheArchive(new URI(remoteLocation + "#" + symlinks[0]), conf);
    DistributedCache.addCacheArchive(new URI(remoteLocation), conf);
    DistributedCache.createSymlink(conf);

    /*
     * Set the citation number in the configuration to be later used by the Mapper
     */
    conf.set("citationNum", args[3]);

    Job job = new Job(conf, "distributed-cache");

    job.setMapperClass(CitationMapper.class);
    job.setReducerClass(CitationReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(getClass());

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:nl.gridline.zieook.tasks.DataFileImportTaskTest.java

License:Apache License

@Test
@Ignore/*from   w w  w.j a v a2 s  .c  o  m*/
public void executeMovielensImport() throws Exception {
    HBaseManager manager = HBaseManager.getInstance(Config.getInstance().getZooKeeperHost());
    new HBaseCollectionTable(manager).drop(CP);
    new HBaseCollectionTable(manager).create(CP);

    new HBaseUserTable(manager).drop(CP);
    new HBaseUserTable(manager).create(CP);

    // drop HBase tables
    // put dat in hbase:
    try {

        LOG.info("copy data from local to HDFS");
        FileSystem hdfs = FileSystem.get(hdfsConfig);

        // cleanup:
        hdfs.delete(input, true);
        hdfs.delete(output, true);

        // import:
        hdfs.copyFromLocalFile(new Path("test-data-big/movies.dat"), new Path(input, "movies.dat"));
        hdfs.copyFromLocalFile(new Path("test-data-big/ratings.dat"), new Path(input, "ratings.dat"));
        hdfs.copyFromLocalFile(new Path("test-data-big/users.dat"), new Path(input, "users.dat"));
    } catch (IOException e) {
        LOG.error("", e);
        fail(e.getMessage());
    }

    // import data into HBase:
    DataFileImportTask imp = new DataFileImportTask();
    imp.setConfig(importMovielensConfig);
    LOG.info("Create data import tasks");
    try {
        importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_COLLECTION);
        importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "movies.dat").toString());
        imp.call();
        LOG.info("COLLECTION data imported");
        importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_RATINGS);
        importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "ratings.dat").toString());
        imp.call();
        LOG.info("RATINGS data imported");
        importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_USERS);
        importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "users.dat").toString());
        imp.call();
        LOG.info("USER data imported");
    } catch (Exception e) {
        LOG.error("", e);
        fail(e.getMessage());
    }

    // DataPrepareTool prepare = new DataPrepareTool();
    // prepare.set(TaskConfig.COLLECTION, COLLECTION);
    // prepare.set(TaskConfig.CP, CP);
    // prepare.configure(HBaseTableConstants.USERTABLE_NAME + CP, output.toString());
    // boolean result = prepare.execute();
    // assertTrue(result);

    // test output...

}

From source file:nl.gridline.zieook.tasks.DataImportExportTest.java

License:Apache License

@Test
@Ignore//w w  w .  j av  a2 s.  c  om
public void recommenderTool() throws Exception {
    try {
        LOG.info("copy data from local to HDFS");
        FileSystem hdfs = FileSystem.get(hdfsConfig);
        // import:
        hdfs.copyFromLocalFile(new Path("test-data-small/movies.dat"), new Path(input, "movies.dat"));
        hdfs.copyFromLocalFile(new Path("test-data-small/ratings.dat"), new Path(input, "ratings.dat"));
        hdfs.copyFromLocalFile(new Path("test-data-small/users.dat"), new Path(input, "users.dat"));
    } catch (IOException e) {
        LOG.error("", e);
        fail(e.getMessage());
    }

    // import data into HBase:
    DataFileImportTask imp = new DataFileImportTask();
    imp.setConfig(inxConfig);
    LOG.info("Create data import tasks");

    inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_COLLECTION);
    inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "movies.dat").toString());
    imp.call();
    LOG.info("COLLECTION data imported");
    inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_RATINGS);
    inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "ratings.dat").toString());
    imp.call();
    LOG.info("RATINGS data imported");
    inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_USERS);
    inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "users.dat").toString());
    imp.call();
    LOG.info("USER data imported");

}