List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(Path src, Path dst) throws IOException
From source file:pegasus.SSSPResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 8) { return printUsage(); }/*from w w w . j a v a 2 s. c o m*/ edge_path = new Path(args[0]); csr_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); summaryout_path = new Path("concmpt_summaryout"); number_nodes = Integer.parseInt(args[5]); nreducers = Integer.parseInt(args[6]); if (args[7].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println( "[PEGASUS] Computing breadth-first search. Edge path = " + args[0] + ", Reducers = " + nreducers); local_output_path = args[4] + "_temp"; JobClient.runJob(configStage1()); FileSystem.get(getConf()).rename(csr_path, tempbm_path); // Iteratively calculate neighborhood function. for (int i = cur_iter; i < MAX_ITERATIONS; i++) { cur_iter++; JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); SSSPResultInfo ri = readIterationOutput(new_path); white_nodes[iter_counter] = ri.white; gray_nodes[iter_counter] = ri.gray; black_nodes[iter_counter] = ri.black; iter_counter++; System.out.println( "Hop " + i + " : white = " + ri.white + ", gray = " + ri.gray + ", black = " + ri.black); // Stop when the minimum neighborhood doesn't change if (ri.gray == 0) { System.out.println("All vertices have been reached. Finishing..."); //fs.delete(csr_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, output_path); break; } // rotate directory //fs.delete(csr_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, tempbm_path); } // FileUtil.fullyDelete( FileSystem.getLocal(getConf()), new Path(local_output_path)); // calculate summary information using an additional stage //System.out.println("Summarizing connected components information..."); //JobClient.runJob(configStage4()); // finishing. System.out.println("\n[PEGASUS] Breadth-first search computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); System.out.println( "[PEGASUS] BFS distance labels are saved in the HDFS sssp_output as\n\"vertex vertex number distance\" format.\n"); return 0; }
From source file:pl.edu.icm.coansys.heeut.TestMapReduce.java
License:Apache License
@Test(timeout = 1800000) public void testWordCountDiff() throws Exception { String prefix = getCurrentDateAppended("wordcount"); String inputDirName = prefix + "-input"; String outputDirName = prefix + "-output"; FileSystem dfs = UTIL.getDFSCluster().getFileSystem(); Path inputDir = new Path(inputDirName); Path qualifiedInputDir = dfs.makeQualified(inputDir); String inputFileName = "src/test/resource/input/wordcount/apache_projects.dat"; dfs.copyFromLocalFile(new Path(inputFileName), qualifiedInputDir); ToolRunner.run(UTIL.getConfiguration(), new WordCount(), new String[] { inputDirName, outputDirName }); Path outputDir = new Path(outputDirName); Path qualifiedOutputDir = dfs.makeQualified(outputDir); String localOutputDir = "src/test/resource/output/wordcount/" + prefix; dfs.copyToLocalFile(qualifiedOutputDir, new Path(localOutputDir)); File outputFile = new File(localOutputDir + "/part-00000"); File expectedFile = new File("src/test/resource/exp/wordcount/apache_projects.exp"); boolean isEqual = FileUtils.contentEquals(outputFile, expectedFile); Assert.assertTrue(isEqual);// ww w . j ava 2s . c om }
From source file:sa.edu.kaust.twitter.preprocess.DetectRetweets.java
License:Apache License
@SuppressWarnings("unused") public static void runDetectRetweet(String input, String output, String startID, String endID) throws Exception { /*if (args.length != 3) { printUsage();/*from ww w. java2s .co m*/ return; } String input = args[0]; String output = args[1]; int reduceTasks = Integer.parseInt(args[2]);*/ //Path inputPath = new Path("/shared/tweets2011"); //Path outputPath = new Path("/user/telsayed/tweets2011"); Path inputPath = new Path(input); Path outputPath = new Path(output); sLogger.info("input dir: " + inputPath); sLogger.info("output dir: " + outputPath); //sLogger.info("num of output files: " + reduceTasks); int mapTasks = 100; Configuration conf = new Configuration(); conf.set("startID", startID); conf.set("endID", endID); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "DetectRetweets"); job.setJarByClass(DetectRetweets.class); /*if (fs.exists(outputPath)) { sLogger.info("Output path already exist: skipping!"); return; }*/ job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); //conf.set("mapred.child.java.opts", "-Xmx2048m"); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); fs.copyToLocalFile(new Path(output + "/part-r-00000"), new Path("retweet/part-r-00000")); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:smile.wide.algorithms.SMILEBSjob.java
License:Apache License
/** Sets up the hadoop job and sends it to the cluster * waits for the job to be completed.*/ @Override// www . j a v a 2 s.c o m public int run(String[] params) throws Exception { //params: <trainfile> <output_path> <number of seeds> Configuration conf = super.getConf(); conf.set("trainfile", params[0]); //distributed cache initialization DistributedCache.createSymlink(conf); DistributedCache.addFileToClassPath(new Path(libHDFSPath_ + "/smile.jar"), conf); DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf); //upload data file to HDFS and add it to the distributed cache FileSystem dfs = FileSystem.get(conf); dfs.copyFromLocalFile(new Path(params[0]), new Path(dataHDFSPath_)); DistributedCache.addCacheFile(new URI(dataHDFSPath_ + basename(params[0]) + "#" + basename(params[0])), conf); //for now, keep the Bayesian search parameters constant conf.setInt("iterationCount", iterationCount); conf.setFloat("linkProbability", linkProbability); conf.setInt("maxParents", maxParents); conf.setInt("maxSearchTime", maxSearchTime); conf.setFloat("priorLinkProbability", priorLinkProbability); conf.setInt("priorSampleSize", priorSampleSize); // conf.setInt(RandSeedInputFormat.CONFKEY_SEED_COUNT, Integer.parseInt(params[2])); conf.setInt(RandSeedInputFormat.CONFKEY_WARMUP_ITER, 100000); conf.setLong("mapred.task.timeout", 3600000); Job job = new Job(conf); job.setJobName("Distributed Bayesian Search"); job.setJarByClass(SMILEBSjob.class); job.setMapperClass(SMILEBSMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StrucLogLikeWritable.class); job.setReducerClass(SMILEBSReducer.class); job.setNumReduceTasks(1); job.setInputFormatClass(RandSeedInputFormat.class); Path outputPath = new Path(params[1]); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath, true); //Run the job job.waitForCompletion(true); //now download result outputPath.suffix("/part-r-00000"); dfs.copyToLocalFile(outputPath.suffix("/part-r-00000"), new Path("./smile-output.txt")); return 0; }