Example usage for org.apache.hadoop.fs FileSystem copyToLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyToLocalFile.

Prototype

public void copyToLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

Copy it a file from the remote filesystem to the local one.

Usage

From source file:edu.uci.ics.hyracks.hdfs2.dataflow.DataflowTest.java

License:Apache License

/**
 * Check if the results are correct/*from  ww w .j  av  a 2  s . c om*/
 * 
 * @return true if correct
 * @throws Exception
 */
private boolean checkResults() throws Exception {
    FileSystem dfs = FileSystem.get(conf.getConfiguration());
    Path result = new Path(HDFS_OUTPUT_PATH);
    Path actual = new Path(ACTUAL_RESULT_DIR);
    dfs.copyToLocalFile(result, actual);

    TestUtils.compareWithResult(new File(EXPECTED_RESULT_PATH + File.separator + "part-0"),
            new File(ACTUAL_RESULT_DIR + File.separator + "customer_result" + File.separator + "part-0"));
    return true;
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.TFIDFDriverImpl.java

License:Apache License

/**
 *
 * @param inputPath//  w  w  w  .  ja va  2  s  .c o  m
 */
public void executeTFIDF(String inputPath) {
    try {
        File items = new File(INPUT_ITEMSET);
        if (!items.exists()) {
            throw new IOException(items.getAbsoluteFile() + " not found");
        }

        String OUTPUT_PATH1 = System.currentTimeMillis() + "_" + UUID.randomUUID()
                + "-TFIDFDriverImpl-1-word-freq";

        if (items.length() < 200000000) {
            String AVRO_FILE = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-avro";
            Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Starting text2Avro");
            text2Avro(inputPath, AVRO_FILE);

            Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO,
                    "Starting WordFrequencyInDocDriver: {0},{1},{2},{3},{4}",
                    new Object[] { AVRO_FILE, OUTPUT_PATH1, INPUT_ITEMSET, NUM_OF_LINES, STOPWORDS_PATH });
            String[] args1 = { AVRO_FILE, OUTPUT_PATH1, INPUT_ITEMSET, STOPWORDS_PATH };
            ToolRunner.run(new WordFrequencyInDocDriver(), args1);
        } else {
            Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Starting TermWordFrequency");
            String[] args1 = { INPUT_ITEMSET, OUTPUT_PATH1, inputPath, STOPWORDS_PATH, NUM_OF_LINES };
            ToolRunner.run(new TermWordFrequency(), args1);
        }
        String OUTPUT_PATH2 = System.currentTimeMillis() + "_" + UUID.randomUUID()
                + "-TFIDFDriverImpl-2-word-counts";
        ;
        String[] args2 = { OUTPUT_PATH1, OUTPUT_PATH2 };
        ToolRunner.run(new WordCountsForDocsDriver(), args2);

        File docs = new File(inputPath);
        File[] files = docs.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.toLowerCase().endsWith(".txt");
            }
        });
        Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "docs:{0}", docs.getAbsolutePath());
        int numberOfDocuments = files.length;
        String OUTPUT_PATH3 = System.currentTimeMillis() + "_" + UUID.randomUUID()
                + "-TFIDFDriverImpl-3-tf-idf";
        String[] args3 = { OUTPUT_PATH2, OUTPUT_PATH3, String.valueOf(numberOfDocuments) };
        ToolRunner.run(new WordsInCorpusTFIDFDriver(), args3);

        StringBuilder fileNames = new StringBuilder();
        String prefix = "";
        for (File name : files) {
            if (name.isFile() && FilenameUtils.getExtension(name.getName()).endsWith("txt")) {
                fileNames.append(prefix);
                prefix = ",";
                fileNames.append(FilenameUtils.removeExtension(name.getName()).replaceAll("_", ""));
            }
        }
        String OUTPUT_PATH4 = System.currentTimeMillis() + "_" + UUID.randomUUID()
                + "-TFIDFDriverImpl-4-distances";
        String[] args4 = { OUTPUT_PATH3, OUTPUT_PATH4, COMPETENCES_PATH, fileNames.toString() };
        Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "args4:{0}", Arrays.toString(args4));
        ToolRunner.run(new CompetencesDistanceDriver(), args4);

        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path hdfsRes = new Path(OUTPUT_PATH4);
        FileStatus[] results = fs.listStatus(hdfsRes);
        for (FileStatus s : results) {
            Path dest = new Path(OUT + "/" + s.getPath().getName());
            Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Copy: {0} to: {1}",
                    new Object[] { s.getPath(), dest });
            fs.copyToLocalFile(s.getPath(), dest);
        }
        fs.delete(hdfsRes, true);

    } catch (Exception ex) {
        Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:eu.scape_project.tb.chutney.Tools.java

License:Apache License

/**
 * Copy an input file to a local temporary file and return the new local filename
 * @param pTmpDir temporary directory to copy files to
 * @param pFs The HDFS filesystem/*from   w w w.j a  v a 2  s. c o m*/
 * @param pInputFile The URL/name of the input file
 * @return A File instance for the new local temporary file
 * @throws IOException file access error
 */
public static File copyInputToLocalTemp(File pTmpDir, FileSystem pFs, String pInputFile) throws IOException {

    //put the file in the new temporary directory
    //we use lastindexof as path may start hdfs:// and File doesn't understand
    System.out.println(pTmpDir + "<-" + pInputFile);
    String tempFile = pTmpDir.getAbsolutePath();
    if (pInputFile.contains("/")) {
        tempFile += (pInputFile.substring(pInputFile.lastIndexOf("/")));
    } else {
        tempFile += "/" + pInputFile;
    }
    File tempInputFile = new File(tempFile);

    //if this file has already been copied - skip
    //FIXME: this thinks that the file exists when it doesn't
    if (tempInputFile.exists())
        return tempInputFile;

    //i.e. this file is a local file
    if (new File(pInputFile).exists()) {
        //   System.out.println("copying from local fs");
        FileInputStream fis = new FileInputStream(pInputFile);
        FileOutputStream fos = new FileOutputStream(tempInputFile);
        byte[] buffer = new byte[Settings.BUFSIZE];
        int bytesRead = 0;
        while (fis.available() > 0) {
            bytesRead = fis.read(buffer);
            fos.write(buffer, 0, bytesRead);
        }
        fis.close();
        fos.close();
        return tempInputFile;
    }
    //this file is in HDFS
    if (pFs.exists(new Path(pInputFile))) {
        //   System.out.println("copying from hdfs");
        pFs.copyToLocalFile(new Path(pInputFile), new Path(tempFile));
        tempInputFile = new File(tempFile);
        return tempInputFile;
    }
    //TODO: check for HTTP files etc

    System.out.println("file not found");
    return null;
}

From source file:gobblin.source.extractor.extract.google.GoogleCommon.java

License:Apache License

private static File copyToLocal(FileSystem fs, Path keyPath) throws IOException {
    java.nio.file.Path tmpKeyPath = Files.createTempFile(GoogleCommon.class.getSimpleName(), "tmp",
            PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
    File copied = tmpKeyPath.toFile();
    copied.deleteOnExit();/*from  w w  w .j a  v a 2s . c  o  m*/

    fs.copyToLocalFile(keyPath, new Path(copied.getAbsolutePath()));
    return copied;
}

From source file:io.apigee.lembos.utils.RunnerUtils.java

License:Apache License

/**
 * Takes a module path, which could be a local filesystem path or a url, and returns the local path to the module.
 *
 * <b>Note:</b> If the value is a URL, the URL will be downloaded locally to create the necessary filesystem
 * location for the Node.js module to allow for archiving and adding to DistributedCache.
 *
 * @param conf the Hadoop configuration/*from  w ww.  j a v a 2 s  .  c  o m*/
 *
 * @return the local filesystem path to the module
 *
 * @throws IOException if anything goes wrong
 */
public static File createLocalCopyOfModule(final Configuration conf) throws IOException {
    final String moduleName = conf.get(LembosConstants.MR_MODULE_NAME);
    final String modulePath = conf.get(LembosConstants.MR_MODULE_PATH);
    File localTempModule = null;

    if (moduleName != null && !moduleName.trim().isEmpty() && modulePath != null
            && !modulePath.trim().isEmpty()) {
        URL moduleUrl;

        // Test if this is a URL or a file
        try {
            moduleUrl = new URL(modulePath);
        } catch (MalformedURLException e) {
            // This is to be expected if the configuration path is not a URL
            moduleUrl = null;
        }

        // Create a local temporary directory to contain the Node.js module
        final java.nio.file.Path tmpDir = Files.createTempDirectory("LembosMapReduceModule");
        FileSystem fs;

        // Delete the temp directory
        tmpDir.toFile().deleteOnExit();

        // Create the proper FileSystem
        if (moduleUrl == null) {
            fs = FileSystem.getLocal(conf);
        } else {
            try {
                fs = FileSystem.get(moduleUrl.toURI(), conf);
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
        }

        final org.apache.hadoop.fs.Path pathObj = new org.apache.hadoop.fs.Path(modulePath);

        if (fs.exists(pathObj)) {
            final org.apache.hadoop.fs.Path tmpPathObj = new org.apache.hadoop.fs.Path(
                    tmpDir.toAbsolutePath().toString());

            // Copy the local/remote file(s) to the temporary directory
            fs.copyToLocalFile(pathObj, tmpPathObj);

            final File moduleFile = new File(
                    new org.apache.hadoop.fs.Path(tmpPathObj, pathObj.getName()).toString());

            // Set the MapReduce module path accordingly
            if (moduleFile.isFile()) {
                final String fileName = moduleFile.getName();
                boolean wasArchive = false;

                if (fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
                    FileUtil.unTar(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                } else if (fileName.endsWith(".zip")) {
                    FileUtil.unZip(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                }

                if (wasArchive) {
                    for (final String extension : KNOWN_NODE_MODULE_EXTENSIONS) {
                        final File potentialModuleFile = new File(tmpDir.toFile(), moduleName + extension);

                        if (potentialModuleFile.exists()) {
                            localTempModule = potentialModuleFile;
                            break;
                        }
                    }
                } else {
                    localTempModule = moduleFile;
                }
            } else {
                localTempModule = new File(tmpDir.toFile(), moduleName);
            }
        } else {
            throw new RuntimeException("Unable to create/locate Node.js module locally: " + modulePath);
        }
    }

    if (localTempModule == null) {
        throw new RuntimeException("Unable to create local copy of Node.js module from path: "
                + conf.get(LembosConstants.MR_MODULE_PATH));
    }

    return localTempModule;
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir)
        throws SegmentLoadingException {
    final LocalFileSystem localFileSystem = new LocalFileSystem();
    try {//w  ww  .j  av  a2 s  .c o  m
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }

                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);

                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                                outDir.getAbsolutePath());
                        return result;
                    }

                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.generic.SOFMapperGeneric.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    try {// w  w  w  .ja  v a2 s  .  c o m
        (new File(tmpName)).mkdir();
        FileSystem fs = FileSystem.get(conf);
        fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName));
        SimulationGeneric genericsim = new SimulationGeneric();
        String SIM_PROGRAM_NAME = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/") + 1, SIM_PROGRAM.length());
        genericsim.run(tmpName + "/" + SIM_PROGRAM_NAME, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER,
                SIMULATION_HOME, output, conf);
        (new File(tmpName)).delete();

    } catch (Throwable e) {
        e.printStackTrace();
    }

}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.generic.SOFReducerGeneric.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    String EVALUATION_PROGRAM_THREAD = "evaluation" + Thread.currentThread().getId();
    FileSystem fs = FileSystem.get(conf);

    if (ISLOOP) {
        Path eprogram = new Path(EVALUATION_PROGRAM_THREAD);
        fs.copyToLocalFile(new Path(RATING_PROGRAM), eprogram);
        try {/*from  ww  w  . j a v a 2  s .  c  o  m*/
            fs.mkdirs(new Path(this.RATING_PATH));
        } catch (Exception e) {
        }

    }

    if (ISLOOP) {
        Random r = new Random(System.currentTimeMillis());
        String id = MD5(key.toString() + r.nextDouble());
        String tmpEvalXml = "tmpEval" + id + ".xml";
        Path ptemp = new Path(tmpEvalXml);
        Path file_output = new Path(key.toString());
        fs.copyToLocalFile(file_output, ptemp);
        String xmlOutput = key.toString().substring(key.toString().lastIndexOf("/") + 1);
        //generateEvaluation(tmpEvalXml,id,EVALUATION_PROGRAM_THREAD);
        generateEvaluation(tmpEvalXml, xmlOutput, EVALUATION_PROGRAM_THREAD);

        File f = new File(System.getProperty("user.dir") + "/" + EVALUATION_PROGRAM_THREAD);
        f.delete();
    }

}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.mason.SOFMapperMason.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    try {/*w  w  w  .  j  av a 2 s.co m*/
        (new File(tmpName)).mkdir();
        FileSystem fs = FileSystem.get(conf);
        fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName));
        SimulationMASON masonsim = new SimulationMASON();
        String SIM_PROGRAM_NAME = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/") + 1, SIM_PROGRAM.length());
        masonsim.run(tmpName + "/" + SIM_PROGRAM_NAME, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER,
                SIMULATION_HOME, output, conf);
        (new File(tmpName)).delete();

    } catch (Throwable e) {
        e.printStackTrace();
    }

}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.netlogo.SOFMapperNetLogo.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {//from w ww .  ja  v a2 s .  c o m

        (new File(tmpName)).mkdir();

        FileSystem fs = FileSystem.get(conf);
        fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName));
        String execName = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/"), SIM_PROGRAM.length());
        SimulationNETLOGO netlogosim = new SimulationNETLOGO();
        netlogosim.run(tmpName + "/" + execName, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER,
                SIMULATION_HOME, output, conf);
        (new File(tmpName)).delete();

    } catch (Throwable e) {
        e.printStackTrace();
    }

}