List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(Path src, Path dst) throws IOException
From source file:SBP.java
License:Apache License
protected static void copyToLocalFile(Configuration conf, Path hdfs_path, Path local_path) throws Exception { FileSystem fs = FileSystem.get(conf); // read the result fs.copyToLocalFile(hdfs_path, local_path); }
From source file:CountJob.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String msgs = ""; doJob("1", args, msgs); doJob("2", args, msgs); FileSystem hdfs = FileSystem.get(conf); BufferedReader bfr = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000")))); BufferedReader bfr2 = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000")))); Boolean same = true;// w w w.j a v a 2 s. c o m String line1; String line2; line1 = bfr.readLine(); line2 = bfr2.readLine(); while (same == true) { if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) { same = false; break; } else if ((line1 == null && line2 == null)) { break; } else { if (line1.equals(line2)) { line1 = bfr.readLine(); line2 = bfr2.readLine(); } else { same = false; break; } } } if (same == true) { System.out.print("same " + same + "\n"); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } else { System.out.print("Different"); doJob("3", args, msgs); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true); hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true); }
From source file:azure.TweetUpload.java
License:Apache License
public static void main(String[] args) { try { String filePath = "hdfs://localhost.localdomain:8020/tmp/hive-mapred/" + args[0] + "/000000_0"; // File location Configuration configuration = new Configuration(); Path path = new Path(filePath); Path newFilePath = new Path("temp_" + args[0]); FileSystem fs = path.getFileSystem(configuration); /* ww w.ja v a 2 s .co m*/ fs.copyToLocalFile(path, newFilePath); // Copy temporary to local directory CloudStorageAccount account = CloudStorageAccount .parse(storageConnectionString); CloudBlobClient serviceClient = account.createCloudBlobClient(); CloudBlobContainer container = serviceClient .getContainerReference("container_name_here"); // Container name (must be lower case) container.createIfNotExists(); // Upload file CloudBlockBlob blob = container .getBlockBlobReference("user/rdp_username_here/analysisFiles/" + args[0] + ".tsv"); File sourceFile = new File(newFilePath.toString()); blob.upload(new FileInputStream(sourceFile), sourceFile.length()); File tmpFile = new File(newFilePath.toString()); tmpFile.delete(); // Delete the temporary file // In case of errors } catch (Exception e) { System.exit(-1); } }
From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java
License:Open Source License
/** * @return returns 0 if successfull, -1 if filesize is incorrect and -2 if an exception occurred */// ww w . j a v a 2s .co m protected static int privateDownloadFileFromHDFS(TaskInputOutputContext context, FileSystem fs, String from, String to) { try { // check if file is present on local scratch File f = new File(to); if (!f.exists()) { Logger.DEBUG("attempting download of \"" + to + "\""); fs.copyToLocalFile(new Path(from), new Path(to)); context.getCounter(HalvadeCounters.FIN_FROM_HDFS) .increment(fs.getFileStatus(new Path(from)).getLen()); } else { // check if filesize is correct if (fs.getFileStatus(new Path(from)).getLen() != f.length()) { // incorrect filesize, remove and download again Logger.DEBUG("incorrect filesize: " + f.length() + " =/= " + fs.getFileStatus(new Path(from)).getLen()); f.delete(); fs.copyToLocalFile(new Path(from), new Path(to)); context.getCounter(HalvadeCounters.FIN_FROM_HDFS) .increment(fs.getFileStatus(new Path(from)).getLen()); } else { Logger.DEBUG("file \"" + to + "\" exists"); } } if (fs.getFileStatus(new Path(from)).getLen() != f.length()) return -1; else return 0; } catch (IOException ex) { Logger.DEBUG("failed to download " + from + " from HDFS: " + ex.getLocalizedMessage()); Logger.EXCEPTION(ex); return -2; } }
From source file:cgl.hadoop.apps.runner.RunnerMap.java
License:Open Source License
public void map(String key, String value, Context context) throws IOException, InterruptedException { long startTime = System.currentTimeMillis(); String endTime = ""; Configuration conf = context.getConfiguration(); String programDir = conf.get(DataAnalysis.PROGRAM_DIR); String execName = conf.get(DataAnalysis.EXECUTABLE); String cmdArgs = conf.get(DataAnalysis.PARAMETERS); String outputDir = conf.get(DataAnalysis.OUTPUT_DIR); String workingDir = conf.get(DataAnalysis.WORKING_DIR); System.out.println("the map key : " + key); System.out.println("the value path : " + value.toString()); System.out.println("Local DB : " + this.localDB); // We have the full file names in the value. String localInputFile = ""; String outFile = ""; String stdOutFile = ""; String stdErrFile = ""; String fileNameOnly = ""; fileNameOnly = key;/*from w w w . j av a2 s .com*/ localInputFile = workingDir + File.separator + fileNameOnly; outFile = workingDir + File.separator + fileNameOnly + ".output"; stdErrFile = workingDir + File.separator + fileNameOnly + ".error"; stdOutFile = workingDir + File.separator + fileNameOnly + ".input"; /** Write your code to get localInputFile, outFile, stdOutFile and stdErrFile **/ // download the file from HDFS Path inputFilePath = new Path(value); FileSystem fs = inputFilePath.getFileSystem(conf); fs.copyToLocalFile(inputFilePath, new Path(localInputFile)); // Prepare the arguments to the executable String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile); if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) { execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile); } else { outFile = stdOutFile; } endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Before running the executable Finished in " + endTime + " seconds"); execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db " + this.localDB; //Create the external process startTime = System.currentTimeMillis(); Process p = Runtime.getRuntime().exec(execCommand); OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile); OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile); // start the stream threads. inputStream.start(); errorStream.start(); p.waitFor(); //end time of this procress endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Program Finished in " + endTime + " seconds"); //Upload the results to HDFS startTime = System.currentTimeMillis(); Path outputDirPath = new Path(outputDir); Path outputFileName = new Path(outputDirPath, fileNameOnly); fs.copyFromLocalFile(new Path(outFile), outputFileName); endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Upload Result Finished in " + endTime + " seconds"); }
From source file:cgl.hadoop.apps.runner.RunnerMap.java
License:Open Source License
public void map(String key, String value, Context context) throws IOException, InterruptedException { long startTime = System.currentTimeMillis(); String endTime = ""; Configuration conf = context.getConfiguration(); String programDir = conf.get(DataAnalysis.PROGRAM_DIR); String execName = conf.get(DataAnalysis.EXECUTABLE); String cmdArgs = conf.get(DataAnalysis.PARAMETERS); String outputDir = conf.get(DataAnalysis.OUTPUT_DIR); String workingDir = conf.get(DataAnalysis.WORKING_DIR); System.out.println("the map key : " + key); System.out.println("the value path : " + value.toString()); System.out.println("Local DB : " + this.localDB); // We have the full file names in the value. String localInputFile = ""; String outFile = ""; String stdOutFile = ""; String stdErrFile = ""; String fileNameOnly = ""; //Custom code String[] tmp = value.split(File.separator); fileNameOnly = tmp[tmp.length - 1];//from w w w . j a v a2s . c o m localInputFile = workingDir + File.separator + fileNameOnly; outFile = workingDir + File.separator + fileNameOnly + ".out"; stdOutFile = workingDir + File.separator + fileNameOnly + ".stdout"; stdErrFile = workingDir + File.separator + fileNameOnly + ".stderr"; // download the file from HDFS Path inputFilePath = new Path(value); FileSystem fs = inputFilePath.getFileSystem(conf); fs.copyToLocalFile(inputFilePath, new Path(localInputFile)); // Prepare the arguments to the executable String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile); if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) { execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile); } else { outFile = stdOutFile; } endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Before running the executable Finished in " + endTime + " seconds"); execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db " + this.localDB; //Create the external process startTime = System.currentTimeMillis(); Process p = Runtime.getRuntime().exec(execCommand); OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile); OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile); // start the stream threads. inputStream.start(); errorStream.start(); p.waitFor(); //end time of this procress endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Program Finished in " + endTime + " seconds"); //Upload the results to HDFS startTime = System.currentTimeMillis(); Path outputDirPath = new Path(outputDir); Path outputFileName = new Path(outputDirPath, fileNameOnly); fs.copyFromLocalFile(new Path(outFile), outputFileName); endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0)); System.out.println("Upload Result Finished in " + endTime + " seconds"); }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { // Find all the right paths and copy .gz files locally FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_FOURTH)) { paths.put(pathName, path);//from w w w .j a v a 2 s . c o m } } for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); status = fs.listStatus(path); for (FileStatus fileStatus : status) { Path p = fileStatus.getPath(); log.debug("Copying {} to {}...", p.toUri(), outPath); fs.copyToLocalFile(p, new Path(outPath, p.getName())); } } // Merge .gz files into indexName.gz File fileOutputPath = new File(outPath); File[] files = fileOutputPath.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".gz"); } }); Arrays.sort(files); String prevIndexName = null; OutputStream out = null; for (File file : files) { log.debug("Processing {}... ", file.getName()); String indexName = file.getName().substring(0, file.getName().indexOf("_")); if (prevIndexName == null) prevIndexName = indexName; if (out == null) out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); if (!prevIndexName.equals(indexName)) { if (out != null) out.close(); log.debug("Index name set to {}", indexName); out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); } InputStream in = new GZIPInputStream(new FileInputStream(file)); log.debug("Copying {} into {}.gz ...", file.getName(), indexName); IOUtils.copyBytes(in, out, 8192, false); in.close(); file.delete(); prevIndexName = indexName; } if (out != null) out.close(); // build B+Tree indexes Location location = new Location(outPath); for (String idxName : Constants.indexNames) { log.debug("Creating {} index...", idxName); String indexFilename = location.absolute(idxName, "gz"); if (new File(indexFilename).exists()) { new File(outPath, idxName + ".dat").delete(); new File(outPath, idxName + ".idn").delete(); CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename); // To save some disk space new File(indexFilename).delete(); } } }
From source file:com.asakusafw.compiler.util.tester.HadoopDriver.java
License:Apache License
private void copyFromHadoop(Location location, File targetDirectory) throws IOException { targetDirectory.mkdirs();//w w w. jav a2 s.com logger.info("copy {} to {}", location, targetDirectory); Path path = new Path(location.toPath('/')); FileSystem fs = path.getFileSystem(configuration); FileStatus[] list = fs.globStatus(path); if (list == null) { throw new IOException( MessageFormat.format("Failed to fs -get: source={0}, destination={1}", path, targetDirectory)); } for (FileStatus status : list) { Path p = status.getPath(); try { fs.copyToLocalFile(p, new Path(new File(targetDirectory, p.getName()).toURI())); } catch (IOException e) { throw new IOException( MessageFormat.format("Failed to fs -get: source={0}, destination={1}", p, targetDirectory), e); } } }
From source file:com.boozallen.cognition.ingest.storm.util.HdfsFileLoader.java
License:Apache License
public void getFileFromHdfs(String hdfsPath, File dst) throws IllegalArgumentException, IOException { FileSystem fileSystem = this.getHadoopFileSystem(); Path src = new Path(hdfsPath); fileSystem.copyToLocalFile(src, new Path(dst.getAbsolutePath())); }
From source file:com.cloudera.livy.rsc.driver.AddJarJob.java
License:Apache License
@Override public Object call(JobContext jc) throws Exception { File localCopyDir = new File(jc.getLocalTmpDir(), "__livy__"); synchronized (jc) { if (!localCopyDir.isDirectory() && !localCopyDir.mkdir()) { throw new IOException("Failed to create directory for downloaded jars."); }/* w w w . j a v a 2s. co m*/ } URI uri = new URI(path); String name = uri.getFragment() != null ? uri.getFragment() : uri.getPath(); name = new File(name).getName(); File localCopy = new File(localCopyDir, name); if (localCopy.exists()) { throw new IOException(String.format("A file with name %s has already been uploaded.", name)); } Configuration conf = jc.sc().sc().hadoopConfiguration(); FileSystem fs = FileSystem.get(uri, conf); fs.copyToLocalFile(new Path(uri), new Path(localCopy.toURI())); MutableClassLoader cl = (MutableClassLoader) Thread.currentThread().getContextClassLoader(); cl.addURL(localCopy.toURI().toURL()); jc.sc().addJar(path); return null; }