List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(Path src, Path dst) throws IOException
From source file:edu.uci.ics.hyracks.hdfs2.dataflow.DataflowTest.java
License:Apache License
/** * Check if the results are correct/*from ww w .j av a 2 s . c om*/ * * @return true if correct * @throws Exception */ private boolean checkResults() throws Exception { FileSystem dfs = FileSystem.get(conf.getConfiguration()); Path result = new Path(HDFS_OUTPUT_PATH); Path actual = new Path(ACTUAL_RESULT_DIR); dfs.copyToLocalFile(result, actual); TestUtils.compareWithResult(new File(EXPECTED_RESULT_PATH + File.separator + "part-0"), new File(ACTUAL_RESULT_DIR + File.separator + "customer_result" + File.separator + "part-0")); return true; }
From source file:eu.edisonproject.classification.tfidf.mapreduce.TFIDFDriverImpl.java
License:Apache License
/** * * @param inputPath// w w w . ja va 2 s .c o m */ public void executeTFIDF(String inputPath) { try { File items = new File(INPUT_ITEMSET); if (!items.exists()) { throw new IOException(items.getAbsoluteFile() + " not found"); } String OUTPUT_PATH1 = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-1-word-freq"; if (items.length() < 200000000) { String AVRO_FILE = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-avro"; Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Starting text2Avro"); text2Avro(inputPath, AVRO_FILE); Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Starting WordFrequencyInDocDriver: {0},{1},{2},{3},{4}", new Object[] { AVRO_FILE, OUTPUT_PATH1, INPUT_ITEMSET, NUM_OF_LINES, STOPWORDS_PATH }); String[] args1 = { AVRO_FILE, OUTPUT_PATH1, INPUT_ITEMSET, STOPWORDS_PATH }; ToolRunner.run(new WordFrequencyInDocDriver(), args1); } else { Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Starting TermWordFrequency"); String[] args1 = { INPUT_ITEMSET, OUTPUT_PATH1, inputPath, STOPWORDS_PATH, NUM_OF_LINES }; ToolRunner.run(new TermWordFrequency(), args1); } String OUTPUT_PATH2 = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-2-word-counts"; ; String[] args2 = { OUTPUT_PATH1, OUTPUT_PATH2 }; ToolRunner.run(new WordCountsForDocsDriver(), args2); File docs = new File(inputPath); File[] files = docs.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.toLowerCase().endsWith(".txt"); } }); Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "docs:{0}", docs.getAbsolutePath()); int numberOfDocuments = files.length; String OUTPUT_PATH3 = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-3-tf-idf"; String[] args3 = { OUTPUT_PATH2, OUTPUT_PATH3, String.valueOf(numberOfDocuments) }; ToolRunner.run(new WordsInCorpusTFIDFDriver(), args3); StringBuilder fileNames = new StringBuilder(); String prefix = ""; for (File name : files) { if (name.isFile() && FilenameUtils.getExtension(name.getName()).endsWith("txt")) { fileNames.append(prefix); prefix = ","; fileNames.append(FilenameUtils.removeExtension(name.getName()).replaceAll("_", "")); } } String OUTPUT_PATH4 = System.currentTimeMillis() + "_" + UUID.randomUUID() + "-TFIDFDriverImpl-4-distances"; String[] args4 = { OUTPUT_PATH3, OUTPUT_PATH4, COMPETENCES_PATH, fileNames.toString() }; Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "args4:{0}", Arrays.toString(args4)); ToolRunner.run(new CompetencesDistanceDriver(), args4); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path hdfsRes = new Path(OUTPUT_PATH4); FileStatus[] results = fs.listStatus(hdfsRes); for (FileStatus s : results) { Path dest = new Path(OUT + "/" + s.getPath().getName()); Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.INFO, "Copy: {0} to: {1}", new Object[] { s.getPath(), dest }); fs.copyToLocalFile(s.getPath(), dest); } fs.delete(hdfsRes, true); } catch (Exception ex) { Logger.getLogger(TFIDFDriverImpl.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:eu.scape_project.tb.chutney.Tools.java
License:Apache License
/** * Copy an input file to a local temporary file and return the new local filename * @param pTmpDir temporary directory to copy files to * @param pFs The HDFS filesystem/*from w w w.j a v a 2 s. c o m*/ * @param pInputFile The URL/name of the input file * @return A File instance for the new local temporary file * @throws IOException file access error */ public static File copyInputToLocalTemp(File pTmpDir, FileSystem pFs, String pInputFile) throws IOException { //put the file in the new temporary directory //we use lastindexof as path may start hdfs:// and File doesn't understand System.out.println(pTmpDir + "<-" + pInputFile); String tempFile = pTmpDir.getAbsolutePath(); if (pInputFile.contains("/")) { tempFile += (pInputFile.substring(pInputFile.lastIndexOf("/"))); } else { tempFile += "/" + pInputFile; } File tempInputFile = new File(tempFile); //if this file has already been copied - skip //FIXME: this thinks that the file exists when it doesn't if (tempInputFile.exists()) return tempInputFile; //i.e. this file is a local file if (new File(pInputFile).exists()) { // System.out.println("copying from local fs"); FileInputStream fis = new FileInputStream(pInputFile); FileOutputStream fos = new FileOutputStream(tempInputFile); byte[] buffer = new byte[Settings.BUFSIZE]; int bytesRead = 0; while (fis.available() > 0) { bytesRead = fis.read(buffer); fos.write(buffer, 0, bytesRead); } fis.close(); fos.close(); return tempInputFile; } //this file is in HDFS if (pFs.exists(new Path(pInputFile))) { // System.out.println("copying from hdfs"); pFs.copyToLocalFile(new Path(pInputFile), new Path(tempFile)); tempInputFile = new File(tempFile); return tempInputFile; } //TODO: check for HTTP files etc System.out.println("file not found"); return null; }
From source file:gobblin.source.extractor.extract.google.GoogleCommon.java
License:Apache License
private static File copyToLocal(FileSystem fs, Path keyPath) throws IOException { java.nio.file.Path tmpKeyPath = Files.createTempFile(GoogleCommon.class.getSimpleName(), "tmp", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); File copied = tmpKeyPath.toFile(); copied.deleteOnExit();/*from w w w .j a v a 2s . c o m*/ fs.copyToLocalFile(keyPath, new Path(copied.getAbsolutePath())); return copied; }
From source file:io.apigee.lembos.utils.RunnerUtils.java
License:Apache License
/** * Takes a module path, which could be a local filesystem path or a url, and returns the local path to the module. * * <b>Note:</b> If the value is a URL, the URL will be downloaded locally to create the necessary filesystem * location for the Node.js module to allow for archiving and adding to DistributedCache. * * @param conf the Hadoop configuration/*from w ww. j a v a 2 s . c o m*/ * * @return the local filesystem path to the module * * @throws IOException if anything goes wrong */ public static File createLocalCopyOfModule(final Configuration conf) throws IOException { final String moduleName = conf.get(LembosConstants.MR_MODULE_NAME); final String modulePath = conf.get(LembosConstants.MR_MODULE_PATH); File localTempModule = null; if (moduleName != null && !moduleName.trim().isEmpty() && modulePath != null && !modulePath.trim().isEmpty()) { URL moduleUrl; // Test if this is a URL or a file try { moduleUrl = new URL(modulePath); } catch (MalformedURLException e) { // This is to be expected if the configuration path is not a URL moduleUrl = null; } // Create a local temporary directory to contain the Node.js module final java.nio.file.Path tmpDir = Files.createTempDirectory("LembosMapReduceModule"); FileSystem fs; // Delete the temp directory tmpDir.toFile().deleteOnExit(); // Create the proper FileSystem if (moduleUrl == null) { fs = FileSystem.getLocal(conf); } else { try { fs = FileSystem.get(moduleUrl.toURI(), conf); } catch (URISyntaxException e) { throw new IOException(e); } } final org.apache.hadoop.fs.Path pathObj = new org.apache.hadoop.fs.Path(modulePath); if (fs.exists(pathObj)) { final org.apache.hadoop.fs.Path tmpPathObj = new org.apache.hadoop.fs.Path( tmpDir.toAbsolutePath().toString()); // Copy the local/remote file(s) to the temporary directory fs.copyToLocalFile(pathObj, tmpPathObj); final File moduleFile = new File( new org.apache.hadoop.fs.Path(tmpPathObj, pathObj.getName()).toString()); // Set the MapReduce module path accordingly if (moduleFile.isFile()) { final String fileName = moduleFile.getName(); boolean wasArchive = false; if (fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { FileUtil.unTar(moduleFile, tmpDir.toFile()); wasArchive = true; } else if (fileName.endsWith(".zip")) { FileUtil.unZip(moduleFile, tmpDir.toFile()); wasArchive = true; } if (wasArchive) { for (final String extension : KNOWN_NODE_MODULE_EXTENSIONS) { final File potentialModuleFile = new File(tmpDir.toFile(), moduleName + extension); if (potentialModuleFile.exists()) { localTempModule = potentialModuleFile; break; } } } else { localTempModule = moduleFile; } } else { localTempModule = new File(tmpDir.toFile(), moduleName); } } else { throw new RuntimeException("Unable to create/locate Node.js module locally: " + modulePath); } } if (localTempModule == null) { throw new RuntimeException("Unable to create local copy of Node.js module from path: " + conf.get(LembosConstants.MR_MODULE_PATH)); } return localTempModule; }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPuller.java
License:Apache License
public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException { final LocalFileSystem localFileSystem = new LocalFileSystem(); try {//w ww .j av a2 s .c o m final FileSystem fs = path.getFileSystem(config); if (fs.isDirectory(path)) { // -------- directory --------- try { return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() { @Override public FileUtils.FileCopyResult call() throws Exception { if (!fs.exists(path)) { throw new SegmentLoadingException("No files found at [%s]", path.toString()); } final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false); final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>(); final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult(); while (children.hasNext()) { final LocatedFileStatus child = children.next(); final Path childPath = child.getPath(); final String fname = childPath.getName(); if (fs.isDirectory(childPath)) { log.warn("[%s] is a child directory, skipping", childPath.toString()); } else { final File outFile = new File(outDir, fname); // Actual copy fs.copyToLocalFile(childPath, new Path(outFile.toURI())); result.addFile(outFile); } } log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw Throwables.propagate(e); } } else if (CompressionUtils.isZip(path.getName())) { // -------- zip --------- final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outDir, shouldRetryPredicate(), false); log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } else if (CompressionUtils.isGz(path.getName())) { // -------- gzip --------- final String fname = path.getName(); final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname)); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outFile); log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath()); return result; } else { throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString()); } } catch (IOException e) { throw new SegmentLoadingException(e, "Error loading [%s]", path.toString()); } }
From source file:it.isislab.sof.core.engine.hadoop.mapreduce.generic.SOFMapperGeneric.java
License:Apache License
@Override public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {// w w w .ja v a2 s . c o m (new File(tmpName)).mkdir(); FileSystem fs = FileSystem.get(conf); fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName)); SimulationGeneric genericsim = new SimulationGeneric(); String SIM_PROGRAM_NAME = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/") + 1, SIM_PROGRAM.length()); genericsim.run(tmpName + "/" + SIM_PROGRAM_NAME, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER, SIMULATION_HOME, output, conf); (new File(tmpName)).delete(); } catch (Throwable e) { e.printStackTrace(); } }
From source file:it.isislab.sof.core.engine.hadoop.mapreduce.generic.SOFReducerGeneric.java
License:Apache License
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String EVALUATION_PROGRAM_THREAD = "evaluation" + Thread.currentThread().getId(); FileSystem fs = FileSystem.get(conf); if (ISLOOP) { Path eprogram = new Path(EVALUATION_PROGRAM_THREAD); fs.copyToLocalFile(new Path(RATING_PROGRAM), eprogram); try {/*from ww w . j a v a 2 s . c o m*/ fs.mkdirs(new Path(this.RATING_PATH)); } catch (Exception e) { } } if (ISLOOP) { Random r = new Random(System.currentTimeMillis()); String id = MD5(key.toString() + r.nextDouble()); String tmpEvalXml = "tmpEval" + id + ".xml"; Path ptemp = new Path(tmpEvalXml); Path file_output = new Path(key.toString()); fs.copyToLocalFile(file_output, ptemp); String xmlOutput = key.toString().substring(key.toString().lastIndexOf("/") + 1); //generateEvaluation(tmpEvalXml,id,EVALUATION_PROGRAM_THREAD); generateEvaluation(tmpEvalXml, xmlOutput, EVALUATION_PROGRAM_THREAD); File f = new File(System.getProperty("user.dir") + "/" + EVALUATION_PROGRAM_THREAD); f.delete(); } }
From source file:it.isislab.sof.core.engine.hadoop.mapreduce.mason.SOFMapperMason.java
License:Apache License
@Override public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {/*w w w . j av a 2 s.co m*/ (new File(tmpName)).mkdir(); FileSystem fs = FileSystem.get(conf); fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName)); SimulationMASON masonsim = new SimulationMASON(); String SIM_PROGRAM_NAME = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/") + 1, SIM_PROGRAM.length()); masonsim.run(tmpName + "/" + SIM_PROGRAM_NAME, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER, SIMULATION_HOME, output, conf); (new File(tmpName)).delete(); } catch (Throwable e) { e.printStackTrace(); } }
From source file:it.isislab.sof.core.engine.hadoop.mapreduce.netlogo.SOFMapperNetLogo.java
License:Apache License
@Override public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {//from w ww . ja v a2 s . c o m (new File(tmpName)).mkdir(); FileSystem fs = FileSystem.get(conf); fs.copyToLocalFile(new Path(SIM_PROGRAM), new Path(tmpName)); String execName = SIM_PROGRAM.substring(SIM_PROGRAM.lastIndexOf("/"), SIM_PROGRAM.length()); SimulationNETLOGO netlogosim = new SimulationNETLOGO(); netlogosim.run(tmpName + "/" + execName, value.toString(), SIM_INPUT_MAPPER, SIM_OUTPUT_MAPPER, SIMULATION_HOME, output, conf); (new File(tmpName)).delete(); } catch (Throwable e) { e.printStackTrace(); } }