Example usage for org.apache.hadoop.fs FileSystem copyToLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyToLocalFile.

Prototype

public void copyToLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

Copy it a file from the remote filesystem to the local one.

Usage

From source file:SBP.java

License:Apache License

protected static void copyToLocalFile(Configuration conf, Path hdfs_path, Path local_path) throws Exception {
    FileSystem fs = FileSystem.get(conf);

    // read the result
    fs.copyToLocalFile(hdfs_path, local_path);
}

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;// w w w.j a  v a 2  s.  c  o  m
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:azure.TweetUpload.java

License:Apache License

public static void main(String[] args) {
            try {
 
                    String filePath = "hdfs://localhost.localdomain:8020/tmp/hive-mapred/"
                                    + args[0] + "/000000_0"; // File location
 
                    Configuration configuration = new Configuration();
 
                    Path path = new Path(filePath);
                    Path newFilePath = new Path("temp_" + args[0]);
                    FileSystem fs = path.getFileSystem(configuration);
                       /*  ww  w.ja v  a  2  s  .co  m*/
                    fs.copyToLocalFile(path, newFilePath);
                    // Copy temporary to local directory
 
                    CloudStorageAccount account = CloudStorageAccount
                                    .parse(storageConnectionString);
                    CloudBlobClient serviceClient = account.createCloudBlobClient();
 
                    CloudBlobContainer container = serviceClient
                                    .getContainerReference("container_name_here"); // Container name (must be lower case)
                    container.createIfNotExists();
 
                    // Upload file
                    CloudBlockBlob blob = container
                                    .getBlockBlobReference("user/rdp_username_here/analysisFiles/"
                                                    + args[0] + ".tsv");
                    File sourceFile = new File(newFilePath.toString());
                    blob.upload(new FileInputStream(sourceFile), sourceFile.length());
 
                    File tmpFile = new File(newFilePath.toString());
                    tmpFile.delete(); // Delete the temporary file
                       
                       
                    // In case of errors
            } catch (Exception e) {
                    System.exit(-1);
            }
    }

From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java

License:Open Source License

/**
 * @return returns 0 if successfull, -1 if filesize is incorrect and -2 if an exception occurred
 *///  ww w  .  j  a  v a 2s .co  m
protected static int privateDownloadFileFromHDFS(TaskInputOutputContext context, FileSystem fs, String from,
        String to) {
    try {
        // check if file is present on local scratch
        File f = new File(to);
        if (!f.exists()) {
            Logger.DEBUG("attempting download of \"" + to + "\"");
            fs.copyToLocalFile(new Path(from), new Path(to));
            context.getCounter(HalvadeCounters.FIN_FROM_HDFS)
                    .increment(fs.getFileStatus(new Path(from)).getLen());
        } else {
            // check if filesize is correct
            if (fs.getFileStatus(new Path(from)).getLen() != f.length()) {
                // incorrect filesize, remove and download again
                Logger.DEBUG("incorrect filesize: " + f.length() + " =/= "
                        + fs.getFileStatus(new Path(from)).getLen());
                f.delete();
                fs.copyToLocalFile(new Path(from), new Path(to));
                context.getCounter(HalvadeCounters.FIN_FROM_HDFS)
                        .increment(fs.getFileStatus(new Path(from)).getLen());

            } else {
                Logger.DEBUG("file \"" + to + "\" exists");
            }
        }
        if (fs.getFileStatus(new Path(from)).getLen() != f.length())
            return -1;
        else
            return 0;
    } catch (IOException ex) {
        Logger.DEBUG("failed to download " + from + " from HDFS: " + ex.getLocalizedMessage());
        Logger.EXCEPTION(ex);
        return -2;
    }
}

From source file:cgl.hadoop.apps.runner.RunnerMap.java

License:Open Source License

public void map(String key, String value, Context context) throws IOException, InterruptedException {

    long startTime = System.currentTimeMillis();
    String endTime = "";

    Configuration conf = context.getConfiguration();
    String programDir = conf.get(DataAnalysis.PROGRAM_DIR);
    String execName = conf.get(DataAnalysis.EXECUTABLE);
    String cmdArgs = conf.get(DataAnalysis.PARAMETERS);
    String outputDir = conf.get(DataAnalysis.OUTPUT_DIR);
    String workingDir = conf.get(DataAnalysis.WORKING_DIR);

    System.out.println("the map key : " + key);
    System.out.println("the value path : " + value.toString());
    System.out.println("Local DB : " + this.localDB);

    // We have the full file names in the value.
    String localInputFile = "";
    String outFile = "";
    String stdOutFile = "";
    String stdErrFile = "";
    String fileNameOnly = "";

    fileNameOnly = key;/*from   w w w .  j av  a2 s .com*/
    localInputFile = workingDir + File.separator + fileNameOnly;
    outFile = workingDir + File.separator + fileNameOnly + ".output";
    stdErrFile = workingDir + File.separator + fileNameOnly + ".error";
    stdOutFile = workingDir + File.separator + fileNameOnly + ".input";
    /**
    Write your code to get localInputFile, outFile,
    stdOutFile and stdErrFile
    **/

    // download the file from HDFS
    Path inputFilePath = new Path(value);
    FileSystem fs = inputFilePath.getFileSystem(conf);
    fs.copyToLocalFile(inputFilePath, new Path(localInputFile));

    // Prepare the arguments to the executable
    String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile);
    if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) {
        execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile);
    } else {
        outFile = stdOutFile;
    }

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Before running the executable Finished in " + endTime + " seconds");

    execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db "
            + this.localDB;
    //Create the external process

    startTime = System.currentTimeMillis();

    Process p = Runtime.getRuntime().exec(execCommand);

    OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile);
    OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile);

    // start the stream threads.
    inputStream.start();
    errorStream.start();

    p.waitFor();
    //end time of this procress
    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Program Finished in " + endTime + " seconds");

    //Upload the results to HDFS
    startTime = System.currentTimeMillis();

    Path outputDirPath = new Path(outputDir);
    Path outputFileName = new Path(outputDirPath, fileNameOnly);
    fs.copyFromLocalFile(new Path(outFile), outputFileName);

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Upload Result Finished in " + endTime + " seconds");

}

From source file:cgl.hadoop.apps.runner.RunnerMap.java

License:Open Source License

public void map(String key, String value, Context context) throws IOException, InterruptedException {

    long startTime = System.currentTimeMillis();
    String endTime = "";

    Configuration conf = context.getConfiguration();
    String programDir = conf.get(DataAnalysis.PROGRAM_DIR);
    String execName = conf.get(DataAnalysis.EXECUTABLE);
    String cmdArgs = conf.get(DataAnalysis.PARAMETERS);
    String outputDir = conf.get(DataAnalysis.OUTPUT_DIR);
    String workingDir = conf.get(DataAnalysis.WORKING_DIR);

    System.out.println("the map key : " + key);
    System.out.println("the value path : " + value.toString());
    System.out.println("Local DB : " + this.localDB);

    // We have the full file names in the value.
    String localInputFile = "";
    String outFile = "";
    String stdOutFile = "";
    String stdErrFile = "";
    String fileNameOnly = "";

    //Custom code
    String[] tmp = value.split(File.separator);
    fileNameOnly = tmp[tmp.length - 1];//from w  w w . j  a v a2s . c o m
    localInputFile = workingDir + File.separator + fileNameOnly;
    outFile = workingDir + File.separator + fileNameOnly + ".out";
    stdOutFile = workingDir + File.separator + fileNameOnly + ".stdout";
    stdErrFile = workingDir + File.separator + fileNameOnly + ".stderr";

    // download the file from HDFS
    Path inputFilePath = new Path(value);
    FileSystem fs = inputFilePath.getFileSystem(conf);
    fs.copyToLocalFile(inputFilePath, new Path(localInputFile));

    // Prepare the arguments to the executable
    String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile);
    if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) {
        execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile);
    } else {
        outFile = stdOutFile;
    }

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Before running the executable Finished in " + endTime + " seconds");

    execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db "
            + this.localDB;
    //Create the external process

    startTime = System.currentTimeMillis();

    Process p = Runtime.getRuntime().exec(execCommand);

    OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile);
    OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile);

    // start the stream threads.
    inputStream.start();
    errorStream.start();

    p.waitFor();
    //end time of this procress
    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Program Finished in " + endTime + " seconds");

    //Upload the results to HDFS
    startTime = System.currentTimeMillis();

    Path outputDirPath = new Path(outputDir);
    Path outputFileName = new Path(outputDirPath, fileNameOnly);
    fs.copyFromLocalFile(new Path(outFile), outputFileName);

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Upload Result Finished in " + endTime + " seconds");

}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);//from  w  w  w .j  a  v a 2 s .  c  o m
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:com.asakusafw.compiler.util.tester.HadoopDriver.java

License:Apache License

private void copyFromHadoop(Location location, File targetDirectory) throws IOException {
    targetDirectory.mkdirs();//w w w. jav  a2  s.com
    logger.info("copy {} to {}", location, targetDirectory);

    Path path = new Path(location.toPath('/'));
    FileSystem fs = path.getFileSystem(configuration);
    FileStatus[] list = fs.globStatus(path);
    if (list == null) {
        throw new IOException(
                MessageFormat.format("Failed to fs -get: source={0}, destination={1}", path, targetDirectory));
    }
    for (FileStatus status : list) {
        Path p = status.getPath();
        try {
            fs.copyToLocalFile(p, new Path(new File(targetDirectory, p.getName()).toURI()));
        } catch (IOException e) {
            throw new IOException(
                    MessageFormat.format("Failed to fs -get: source={0}, destination={1}", p, targetDirectory),
                    e);
        }
    }
}

From source file:com.boozallen.cognition.ingest.storm.util.HdfsFileLoader.java

License:Apache License

public void getFileFromHdfs(String hdfsPath, File dst) throws IllegalArgumentException, IOException {
    FileSystem fileSystem = this.getHadoopFileSystem();
    Path src = new Path(hdfsPath);
    fileSystem.copyToLocalFile(src, new Path(dst.getAbsolutePath()));
}

From source file:com.cloudera.livy.rsc.driver.AddJarJob.java

License:Apache License

@Override
public Object call(JobContext jc) throws Exception {
    File localCopyDir = new File(jc.getLocalTmpDir(), "__livy__");
    synchronized (jc) {
        if (!localCopyDir.isDirectory() && !localCopyDir.mkdir()) {
            throw new IOException("Failed to create directory for downloaded jars.");
        }/*  w w w  . j  a v  a 2s. co m*/
    }

    URI uri = new URI(path);
    String name = uri.getFragment() != null ? uri.getFragment() : uri.getPath();
    name = new File(name).getName();
    File localCopy = new File(localCopyDir, name);

    if (localCopy.exists()) {
        throw new IOException(String.format("A file with name %s has already been uploaded.", name));
    }

    Configuration conf = jc.sc().sc().hadoopConfiguration();
    FileSystem fs = FileSystem.get(uri, conf);
    fs.copyToLocalFile(new Path(uri), new Path(localCopy.toURI()));

    MutableClassLoader cl = (MutableClassLoader) Thread.currentThread().getContextClassLoader();
    cl.addURL(localCopy.toURI().toURL());

    jc.sc().addJar(path);
    return null;
}