Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately
 * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive.
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If source
 *                  is a folder its contents will be copied into dest. For more info see
 *                  {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 *///from  ww w. j  ava  2s  .co  m
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    Path local = new Path(source.getURL().getPath());
    fs.copyFromLocalFile(local, dest);
    fs.setPermission(dest, CACHED_FILE_PERMISSION);
    fs.setReplication(dest, replication);
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value
 * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging
 * the archive.//from   www . j  a v  a  2s . c om
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into
 *                  dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If
 *                  source is a folder its contents will be copied into dest. For more info see {@link
 *                  FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 */
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) {
        copyConfigProperties(source, fs, dest);
    } else {
        Path local = new Path(source.getURL().getPath());
        fs.copyFromLocalFile(local, dest);
    }

    if (isPublic) {
        fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION);
    } else {
        fs.setPermission(dest, CACHED_FILE_PERMISSION);
    }
    fs.setReplication(dest, replication);
}

From source file:org.springframework.data.hadoop.mapreduce.StreamingTest.java

License:Apache License

private void cleanOutput(ApplicationContext ctx) throws Exception {
    FileSystem fs = FileSystem.get(ctx.getBean(Configuration.class));
    fs.copyFromLocalFile(new Path("../build.gradle"), new Path("test/"));
    fs.delete(new Path("output"), true);
}

From source file:org.starschema.hadoop.yarn.applications.distributedshell.Client.java

License:Apache License

private void addToLocalResourcesCompressed(FileSystem fs, String fileSrcPath, String fileDstPath, String appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {/*ww  w . j  a  va 2 s  .com*/
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
            scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveClassPathToJob(JobConf jobConf) throws IOException {
    logger.info("Copying classpath to job");
    if (jobConf.getBoolean("terrier.classpath.copied", false)) {
        return;//from  www.j a v a2s .com
    }
    jobConf.setBoolean("terrier.classpath.copied", true);
    final String[] jars = findJarFiles(
            new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") });
    final FileSystem defFS = FileSystem.get(jobConf);
    for (String jarFile : jars) {
        //logger.debug("Adding " + jarFile + " to job class path");
        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeTemporaryFile(jobConf, filename);
        defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
        DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf);
    }
    DistributedCache.createSymlink(jobConf);
}

From source file:org.trommel.trommel.mapreduce.TrommelDriver.java

License:Apache License

private static int processScript(Level logLevel, int numOfReducers, String trommelScriptFilePath)
        throws Exception {
    int exitCode = 0;
    FrontEndInterpreter frontEndInterpreter = null;
    FileSystem fileSystem = null;
    Path cachedScript = null;//from  w ww. j a va 2 s. c  om

    logger.setLevel(logLevel);

    try {
        logger.info(String.format("Loading and parsing TommelScript file %1$s ...", trommelScriptFilePath));
        Lexer lexer = new Lexer(
                new PushbackReader(new BufferedReader(new FileReader(trommelScriptFilePath)), 4096));
        Parser parser = new Parser(lexer);
        Start ast = parser.parse();
        ValidationInterpreter validationInterpreter = new ValidationInterpreter();

        logger.info(String.format("Validating TommelScript...", trommelScriptFilePath));
        ast.apply(validationInterpreter);

        if (validationInterpreter.getSemanticErrors().size() != 0) {
            // Validation of script failed
            logger.info(String.format("TommelScript failed validation with following errors:",
                    trommelScriptFilePath));

            for (String errorMessage : validationInterpreter.getSemanticErrors()) {
                logger.info(errorMessage);
            }

            return exitCode;
        }

        logger.info(String.format("Interpreting TommelScript...", trommelScriptFilePath));
        frontEndInterpreter = new FrontEndInterpreter(logger, DEFAULT_HDFS_PATH);
        ast.apply(frontEndInterpreter);

        logger.debug("Creating Job object");
        Job job = new Job();

        job.setJarByClass(TrommelDriver.class);

        // Copy TrommelScript file from local file system to HDFS and added to distributed cache
        fileSystem = FileSystem.get(job.getConfiguration());
        Path src = new Path(trommelScriptFilePath);
        cachedScript = new Path(String.format("/tmp/%1$s_%2$s", src.getName(), UUID.randomUUID().toString()));

        fileSystem.copyFromLocalFile(src, cachedScript);
        logger.debug(String.format("Moved TrommelScript file to HDFS as %1$s.", cachedScript.toString()));

        logger.debug("Adding TrommelScript file to DistibutedCachce.");
        DistributedCache.addCacheFile(new URI(cachedScript.toString()), job.getConfiguration());

        logger.debug(String.format("Setting LOGGING_LEVEL_CONFIG_PROP to %1$s", logLevel.toString()));
        job.getConfiguration().set(LOGGING_LEVEL_CONFIG_PROP, logLevel.toString());

        // Specify HDFS input/output locations
        logger.debug(String.format("Calling FileInputFormat.addInputPath() with %1$s.",
                frontEndInterpreter.getHdfsInputFilePath()));
        FileInputFormat.addInputPath(job, new Path(frontEndInterpreter.getHdfsInputFilePath()));

        logger.debug(String.format("Calling FileOutputFormat.setOutputPath() with %1$s.",
                frontEndInterpreter.getHdfsOutputFilePath()));
        FileOutputFormat.setOutputPath(job, new Path(frontEndInterpreter.getHdfsOutputFilePath()));

        // Hadoop setup
        job.setMapperClass(TrommelMapper.class);

        if (frontEndInterpreter.samplingData()) {
            logger.debug("Trommel is sampling data, 0 Reducers set.");
            job.setNumReduceTasks(0);
        } else {
            logger.debug(String.format("Setting number of Reducers to %1$s.", numOfReducers));
            job.setReducerClass(TrommelReducer.class);
            job.setNumReduceTasks(numOfReducers);
        }

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        logger.debug("Running job");
        if (!job.waitForCompletion(true)) {
            exitCode = 1;
        } else if (frontEndInterpreter.getLocalFilePath() != null) {
            // User would like data exported to local file system
            logger.debug(String.format("Exporting Trommel output from %1$s to %2$s.",
                    frontEndInterpreter.getHdfsOutputFilePath(), frontEndInterpreter.getLocalFilePath()));
            Path mergeFilePath = new Path(String.format("/tmp/%1$s", UUID.randomUUID()));
            FSDataOutputStream mergeFileStream = fileSystem.create(mergeFilePath);
            Path localFilePath = new Path(frontEndInterpreter.getLocalFilePath());
            FileStatus[] outputFileStatuses = fileSystem
                    .listStatus(new Path(frontEndInterpreter.getHdfsOutputFilePath()));
            FSDataInputStream outputFileStream = null;
            String fileNameFilter = (frontEndInterpreter.samplingData() ? "part-m" : "part-r");

            try {
                // Loop through the output, merging any reducer output file for export to local file system
                for (FileStatus outputFileStatus : outputFileStatuses) {
                    if (!outputFileStatus.isDir()
                            && outputFileStatus.getPath().getName().contains(fileNameFilter)) {
                        logger.debug(String.format("Merging file %1$s into local file system output.",
                                outputFileStatus.getPath().toString()));

                        outputFileStream = fileSystem.open(outputFileStatus.getPath());
                        byte[] buffer = new byte[(int) outputFileStatus.getLen()];

                        outputFileStream.read(buffer);

                        mergeFileStream.write(buffer);

                        outputFileStream.close();
                    }
                }
            } finally {
                if (mergeFileStream != null) {
                    mergeFileStream.close();
                    fileSystem.copyToLocalFile(mergeFilePath, localFilePath);
                    fileSystem.delete(mergeFilePath, true);
                }
            }
        }
    } finally {
        try {
            if (fileSystem != null) {
                // Clean up any temp files if needed
                if (frontEndInterpreter.getHdfsOutputFilePath().equals(DEFAULT_HDFS_PATH)) {
                    logger.debug(String.format("Deleting temp files from /tmp/Trommel"));
                    fileSystem.delete(new Path(DEFAULT_HDFS_PATH), true);
                }

                // Clean up the cached file
                logger.debug(String.format("Deleting cached TrommelScript file %1$s", cachedScript.toString()));
                fileSystem.delete(cachedScript, true);
            }
        } catch (IOException ioe) {
            // Couldn't delete file for some reason, alert user
            logger.error(String.format(
                    "Exception encountered deleting cached TommelScript file %1$s. Error message: %2$s",
                    cachedScript.toString(), ioe.getMessage()));
        }
    }

    return exitCode;
}

From source file:org.unigram.likelike.lsh.LSHRecommendations.java

License:Apache License

/**
 * Save keys.//from www.j av  a2 s  .co  m
 * @param keys hash keys
 * @param inputFile input file
 * @param conf configuration
 * @throws IOException -
 */
private void saveKeys(final String keys, final String inputFile, final Configuration conf) throws IOException {
    /* save to local fs */
    String tempKeyFile = new String("keys.tmp");
    try {
        FileOutputStream fos = new FileOutputStream(tempKeyFile);
        OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8");
        BufferedWriter bw = new BufferedWriter(osw);
        bw.write(keys + "\n");
        bw.close();
        osw.close();
        fos.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

    /* put local file to hdfs */
    FileSystem fs = FileSystem.get(conf);
    Path localKeyFilePath = new Path(tempKeyFile);
    Path hdfsKeyFilePath = new Path(inputFile + ".keys");
    fs.copyFromLocalFile(localKeyFilePath, hdfsKeyFilePath);

    /* remove local file*/
    fs.delete(localKeyFilePath, true);

    return;
}

From source file:org.warcbase.index.IndexerRunner.java

License:Apache License

private void cacheSolrHome(JobConf conf, String solrHomeZipName) throws IOException {
    File tmpSolrHomeDir = new File("src/main/solr").getAbsoluteFile();

    // Create a ZIP file.
    File solrHomeLocalZip = File.createTempFile("tmp-", solrHomeZipName);
    Zipper.zipDir(tmpSolrHomeDir, solrHomeLocalZip);

    // Add to HDFS.
    FileSystem fs = FileSystem.get(conf);
    String hdfsSolrHomeDir = fs.getHomeDirectory() + "/solr/tempHome/" + solrHomeZipName;
    fs.copyFromLocalFile(new Path(solrHomeLocalZip.toString()), new Path(hdfsSolrHomeDir));

    final URI baseZipUrl = fs.getUri().resolve(hdfsSolrHomeDir + '#' + solrHomeZipName);

    // Cache it.//from  w w w  . j a v  a  2  s  .c o  m
    DistributedCache.addCacheArchive(baseZipUrl, conf);
}

From source file:oz.hadoop.yarn.api.core.ApplicationMasterLauncherImpl.java

License:Apache License

/**
 *
 *///from   w w  w .  j a va2  s .c o  m
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId,
        Map<String, LocalResource> localResources) {
    String suffix = this.applicationName + "_master/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);

    try {
        Path sourcePath = new Path(fileSrcPath);
        if (logger.isDebugEnabled()) {
            logger.debug("Copying '" + sourcePath + "' to " + dst);
        }
        fs.copyFromLocalFile(sourcePath, dst);
        FileStatus scFileStatus = fs.getFileStatus(dst);
        LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
                LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
                scFileStatus.getModificationTime());
        localResources.put(fileDstPath, scRsrc);
    } catch (Exception e) {
        throw new IllegalStateException("Failed to communicate with FileSystem: " + fs, e);
    }
}

From source file:pl.edu.icm.coansys.heeut.TestMapReduce.java

License:Apache License

@Test(timeout = 1800000)
public void testWordCount() throws Exception {

    String prefix = getCurrentDateAppended("wordcount");
    String inputDirName = prefix + "-input";
    String outputDirName = prefix + "-output";

    FileSystem dfs = UTIL.getDFSCluster().getFileSystem();
    Path inputDir = new Path(inputDirName);
    Path qualifiedInputDir = dfs.makeQualified(inputDir);

    dfs.copyFromLocalFile(new Path("src/test/resource/input/wordcount/apache_projects.dat"), qualifiedInputDir);
    ToolRunner.run(UTIL.getConfiguration(), new WordCount(), new String[] { inputDirName, outputDirName });

    InputStream contentStream = dfs.open(new Path(outputDirName + "/part-00000"));
    BufferedReader contentReader = new BufferedReader(new InputStreamReader(contentStream));
    Assert.assertEquals("Apache\t3", contentReader.readLine());
    Assert.assertEquals("HBase\t1", contentReader.readLine());
    Assert.assertEquals("Hadoop\t1", contentReader.readLine());
    Assert.assertEquals("Pig\t1", contentReader.readLine());

    Assert.assertNull(contentReader.readLine());
    contentReader.close();/* w ww.  j  ava 2  s  . c o  m*/
}