List of usage examples for org.apache.hadoop.fs FileSystem rename
public abstract boolean rename(Path src, Path dst) throws IOException;
From source file:pathmerge.linear.MergePathH1Driver.java
License:Apache License
public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath) throws IOException { JobConf conf = new JobConf(MergePathH1Driver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }//from w w w .jav a 2 s .co m conf.setJobName("Initial Path-Starting-Points Table"); conf.setMapperClass(SNodeInitialMapper.class); conf.setReducerClass(SNodeInitialReducer.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); String singlePointPath = "comSinglePath0"; MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext")); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(inputPath + "stepNext"), true); JobClient.runJob(conf); dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath)); int iMerge = 0; /*----------------------------------------------------------------------*/ for (iMerge = 1; iMerge <= mergeRound; iMerge++) { // if (!dfs.exists(new Path(inputPath + "-step1"))) // break; conf = new JobConf(MergePathH1Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH1Mapper.class); conf.setReducerClass(MergePathH1Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle)); } }
From source file:pathmerge.log.MergePathH2Driver.java
License:Apache License
public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath) throws IOException { JobConf conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/*from w ww . java2 s. c om*/ conf.setJobName("Initial Path-Starting-Points Table"); conf.setMapperClass(SNodeInitialMapper.class); conf.setReducerClass(SNodeInitialReducer.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); String singlePointPath = "comSinglePath0"; MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext")); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(inputPath + "stepNext"), true); JobClient.runJob(conf); dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath)); int iMerge = 0; for (iMerge = 1; iMerge <= mergeRound; iMerge++) { // if (!dfs.exists(new Path(inputPath + "-step1"))) // break; conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH2Mapper.class); conf.setReducerClass(MergePathH2Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle)); } /* conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH2Mapper.class); conf.setReducerClass(MergePathH2Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/ }
From source file:pegasus.ResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 9) { return printUsage(); }/*from ww w . j av a 2s. com*/ edge_path = new Path(args[0]); curbm_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); summaryout_path = new Path("concmpt_summaryout"); number_nodes = Integer.parseInt(args[5]); nreducers = Integer.parseInt(args[6]); if (args[7].compareTo("new") == 0) start_from_newbm = 1; else { // args[7] == contNN e.g.) cont10 start_from_newbm = 0; cur_iter = Integer.parseInt(args[7].substring(4)); System.out.println("Starting from cur_iter = " + cur_iter); } if (args[8].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing connected component. Edge path = " + args[0] + ", Newbm = " + args[7] + ", Reducers = " + nreducers); local_output_path = args[4] + "_temp"; if (start_from_newbm == 1) { System.out.print("Generating initial component vector for " + number_nodes + " nodes "); // create bitmask generate command file, and copy to curbm_path gen_component_vector_file(number_nodes, curbm_path); System.out.println(" done"); } else { System.out.println("Resuming from current component vector at radius(" + cur_iter + ")"); } // Iteratively calculate neighborhood function. for (int i = cur_iter; i < MAX_ITERATIONS; i++) { cur_iter++; JobClient.runJob(configStage1()); JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); ResultInfo ri = readIterationOutput(new_path); changed_nodes[iter_counter] = ri.changed; changed_nodes[iter_counter] = ri.unchanged; iter_counter++; System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged); // Stop when the minimum neighborhood doesn't change if (ri.changed == 0) { System.out.println("All the component ids converged. Finishing..."); fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); break; } // rotate directory fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); } FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // calculate summary information using an additional stage System.out.println("Summarizing connected components information..."); JobClient.runJob(configStage4()); // finishing. System.out.println("\n[PEGASUS] Connected component computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); System.out.println( "[PEGASUS] Connected component information is saved in the HDFS concmpt_curbm as\n\"node_id 'msf'component_id\" format"); System.out.println( "[PEGASUS] Connected component distribution is saved in the HDFS concmpt_summaryout as\n\"component_id number_of_nodes\" format.\n"); return 0; }
From source file:pegasus.ConCmptBlock.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 9) { return printUsage(); }//ww w .j a va 2 s .co m int i; edge_path = new Path(args[0]); curbm_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); curbm_unfold_path = new Path("concmpt_curbm"); summaryout_path = new Path("concmpt_summaryout"); number_nodes = Integer.parseInt(args[5]); nreducers = Integer.parseInt(args[6]); if (args[7].compareTo("fast") == 0) recursive_diagmult = 1; else recursive_diagmult = 0; block_width = Integer.parseInt(args[8]); System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing connected component using block method. Reducers = " + nreducers + ", block_width = " + block_width); local_output_path = args[4] + "_temp"; // Iteratively calculate neighborhood function. for (i = cur_radius; i < MAX_ITERATIONS; i++) { cur_radius++; iter_counter++; JobClient.runJob(configStage1()); JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); ResultInfo ri = ConCmpt.readIterationOutput(new_path); changed_nodes[iter_counter] = ri.changed; changed_nodes[iter_counter] = ri.unchanged; System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged); // Stop when the minimum neighborhood doesn't change if (ri.changed == 0) { System.out.println("All the component ids converged. Finishing..."); fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); System.out.println("Unfolding the block structure for easy lookup..."); JobClient.runJob(configStage4()); break; } // rotate directory fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); } FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // calculate summary information using an additional pass System.out.println("Summarizing connected components information..."); JobClient.runJob(configStage5()); // finishing. System.out.println("\n[PEGASUS] Connected component computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); System.out.println( "[PEGASUS] Connected component information is saved in the HDFS concmpt_curbm as\n\"node_id 'msf'component_id\" format"); System.out.println( "[PEGASUS] Connected component distribution is saved in the HDFS concmpt_summaryout as\n\"component_id number_of_nodes\" format.\n"); return 0; }
From source file:pegasus.hadi.Hadi.java
License:Apache License
public int run(final String[] args) throws Exception { int i;/*from w w w . j a v a2s . c o m*/ int max_iteration = MAX_ITERATIONS; if (args.length != 12) { return printUsage(); } edge_path = new Path(args[0]); curbm_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); number_nodes = Integer.parseInt(args[5]); radius_path = new Path("hadi_radius"); radius_summary_path = new Path("hadi_radius_summary"); nreplication = Integer.parseInt(args[6]); nreducer = Integer.parseInt(args[7]); if (args[8].compareTo("enc") == 0) encode_bitmask = 1; if (args[9].compareTo("newbm") == 0) { start_from_newbm = 1; } else if (args[9].startsWith("cont")) { start_from_newbm = 0; cur_radius = Integer.parseInt(args[9].substring(4)); } if (args[10].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; if (args[11].compareTo("max") != 0) max_iteration = Integer.parseInt(args[11]); System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing Radii/Diameter. Current hop: " + cur_radius + ", " + "edge_path: " + args[0] + ", encode: " + encode_bitmask + ", # reducers: " + nreducer + ", makesym: " + make_symmetric + ", max_iteration: " + max_iteration + "\n"); local_output_path = args[4] + number_nodes + "_temp"; if (start_from_newbm == 1) { System.out.print("Generating initial bitstrings for " + number_nodes + " nodes "); // create bitmask generate command file, and copy to curbm_path gen_bitmask_cmd_file(number_nodes, nreplication, curbm_path); System.out.println(" done"); } else { System.out.println("Resuming from current hadi_curbm which contains up to N(" + (cur_radius - 1) + ")"); } N[0] = number_nodes; boolean eff_diameter_computed = false; // Iteratively run Stage1 to Stage3. for (i = cur_radius; i <= max_iteration; i++) { JobClient.runJob(configStage1(edge_type)); JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); HadiResultInfo ri = HadiUtils.readNhoodOutput(new_path); N[i] = ri.nh; iter_counter++; System.out.println( "Nh(" + i + "):\t" + N[i] + "\tGuessed Radius(" + (i - 1) + "):\t" + ri.converged_nodes); // Stop when all radii converged. if (ri.changed_nodes == 0) {//if( i > 1 && N[i] == N[i-1] ) { System.out.println("All the bitstrings converged. Finishing..."); fs.delete(curbm_path); fs.delete(tempbm_path); fs.rename(nextbm_path, curbm_path); System.out.println("Calculating the effective diameter..."); JobClient.runJob(configStage4()); eff_diameter_computed = true; break; } // rotate directory. fs.delete(curbm_path); fs.delete(tempbm_path); if (i < MAX_ITERATIONS - 1) fs.delete(output_path); fs.rename(nextbm_path, curbm_path); cur_radius++; } if (eff_diameter_computed == false) { System.out.println("Calculating the effective diameter..."); JobClient.runJob(configStage4()); } // Summarize Radius Information System.out.println("Summarizing radius information..."); JobClient.runJob(configStage5()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // print summary information if (i > max_iteration) System.out.println("Reached Max Iteartion " + max_iteration); System.out.println("Total Iteration = " + iter_counter + "."); System.out.println("Neighborhood Summary:"); for (int j = 0; j <= (i); j++) System.out.println("\tNh(" + (j) + "):\t" + N[j]); System.out.println("\n[PEGASUS] Radii and diameter computed."); System.out.println("[PEGASUS] Maximum diameter: " + (cur_radius - 1)); System.out.println("[PEGASUS] Average diameter: " + HadiUtils.average_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] 90% Effective diameter: " + HadiUtils.effective_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] Radii are saved in the HDFS " + radius_path.getName()); System.out.println("[PEGASUS] Radii summary is saved in the HDFS " + radius_summary_path.getName() + "\n"); return 0; }
From source file:pegasus.hadi.HadiBlock.java
License:Apache License
public int run(final String[] args) throws Exception { int i;/*from w ww .j a v a2s . c om*/ int max_iteration = MAX_ITERATIONS; if (args.length != 12) { return printUsage(); } edge_path = new Path(args[0]); curbm_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); number_nodes = Integer.parseInt(args[5]); radius_path = new Path("hadi_radius_block"); radius_summary_path = new Path("hadi_radius_block_summary"); nreplication = Integer.parseInt(args[6]); nreducer = Integer.parseInt(args[7]); if (args[8].compareTo("enc") == 0) encode_bitmask = 1; if (args[9].compareTo("newbm") == 0) start_from_newbm = 1; else { start_from_newbm = 0; cur_radius = Integer.parseInt(args[9].substring(4)); } block_width = Integer.parseInt(args[10]); if (args[11].compareTo("max") != 0) max_iteration = Integer.parseInt(args[11]); System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing Radii/Diameter using block method. Current hop: " + cur_radius + ", edge_path: " + args[0] + ", encode: " + encode_bitmask + ", # reducers: " + nreducer + ", block width: " + block_width + ", max_iteration: " + max_iteration + "\n"); local_output_path = args[4] + number_nodes + "_tempblk"; N[0] = number_nodes; // Iteratively run Stage1 to Stage3. for (i = cur_radius; i <= max_iteration; i++) { JobClient.runJob(configStage1()); JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); HadiResultInfo ri = HadiUtils.readNhoodOutput(new_path); N[i] = ri.nh; iter_counter++; System.out.println( "Nh(" + i + "):\t" + N[i] + "\tGuessed Radius(" + (i - 1) + "):\t" + ri.converged_nodes); // Stop when all radii converged. if (ri.changed_nodes == 0) {//if( i > 1 && N[i] == N[i-1] ) { System.out.println("All the bitstrings converged. Finishing..."); fs.delete(curbm_path); fs.delete(tempbm_path); fs.rename(nextbm_path, curbm_path); break; } // rotate directory fs.delete(curbm_path); fs.delete(tempbm_path); if (i < MAX_ITERATIONS - 1) fs.delete(output_path); fs.rename(nextbm_path, curbm_path); cur_radius++; } // Summarize Radius Information System.out.println("Calculating the effective diameter..."); JobClient.runJob(configStage4()); // Summarize Radius Information System.out.println("Summarizing radius information..."); JobClient.runJob(configStage5()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // print summary information if (i > max_iteration) System.out.println("Reached Max Iteartion " + max_iteration); System.out.println("Total Iteration = " + iter_counter + "."); System.out.println("Neighborhood Summary:"); for (int j = 0; j <= (i); j++) System.out.println("\tNh(" + (j) + "):\t" + N[j]); System.out.println("\n[PEGASUS] Radii and diameter computed."); System.out.println("[PEGASUS] Maximum diameter: " + (cur_radius - 1)); System.out.println("[PEGASUS] Average diameter: " + HadiUtils.average_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] 90% Effective diameter: " + HadiUtils.effective_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] Radii are saved in the HDFS " + radius_path.getName()); System.out.println("[PEGASUS] Radii summary is saved in the HDFS " + radius_summary_path.getName() + "\n"); return 0; }
From source file:pegasus.HadiResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { int i;//from w w w. jav a 2s .c om int max_iteration = MAX_ITERATIONS; if (args.length != 12) { return printUsage(); } edge_path = new Path(args[0]); curbm_path = new Path(args[1]); tempbm_path = new Path(args[2]); nextbm_path = new Path(args[3]); output_path = new Path(args[4]); number_nodes = Integer.parseInt(args[5]); radius_path = new Path("hadi_radius"); radius_summary_path = new Path("hadi_radius_summary"); nreplication = Integer.parseInt(args[6]); nreducer = Integer.parseInt(args[7]); if (args[8].compareTo("enc") == 0) encode_bitmask = 1; if (args[9].compareTo("newbm") == 0) { start_from_newbm = 1; } else if (args[9].startsWith("cont")) { start_from_newbm = 0; cur_radius = Integer.parseInt(args[9].substring(4)); } if (args[10].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; if (args[11].compareTo("max") != 0) max_iteration = Integer.parseInt(args[11]); System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing Radii/Diameter. Current hop: " + cur_radius + ", edge_path: " + args[0] + ", encode: " + encode_bitmask + ", # reducers: " + nreducer + ", makesym: " + make_symmetric + ", max_iteration: " + max_iteration + "\n"); local_output_path = args[4] + number_nodes + "_temp"; if (start_from_newbm == 1) { System.out.print("Generating initial bitstrings for " + number_nodes + " nodes "); // create bitmask generate command file, and copy to curbm_path gen_bitmask_cmd_file(number_nodes, nreplication, curbm_path); System.out.println(" done"); } else { System.out.println("Resuming from current hadi_curbm which contains up to N(" + (cur_radius - 1) + ")"); } N[0] = number_nodes; boolean eff_diameter_computed = false; // Iteratively run Stage1 to Stage3. for (i = cur_radius; i <= max_iteration; i++) { JobClient.runJob(configStage1(edge_type)); JobClient.runJob(configStage2()); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); final FileSystem fs = FileSystem.get(getConf()); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); HadiResultInfo ri = HadiUtils.readNhoodOutput(new_path); N[i] = ri.nh; iter_counter++; System.out.println( "Nh(" + i + "):\t" + N[i] + "\tGuessed Radius(" + (i - 1) + "):\t" + ri.converged_nodes); // Stop when all radii converged. if (ri.changed_nodes == 0) {//if( i > 1 && N[i] == N[i-1] ) { System.out.println("All the bitstrings converged. Finishing..."); fs.delete(curbm_path); fs.delete(tempbm_path); fs.rename(nextbm_path, curbm_path); System.out.println("Calculating the effective diameter..."); JobClient.runJob(configStage4()); eff_diameter_computed = true; break; } // rotate directory. fs.delete(curbm_path); fs.delete(tempbm_path); if (i < MAX_ITERATIONS - 1) fs.delete(output_path); fs.rename(nextbm_path, curbm_path); cur_radius++; } if (eff_diameter_computed == false) { System.out.println("Calculating the effective diameter..."); JobClient.runJob(configStage4()); } // Summarize Radius Information System.out.println("Summarizing radius information..."); JobClient.runJob(configStage5()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // print summary information if (i > max_iteration) System.out.println("Reached Max Iteartion " + max_iteration); System.out.println("Total Iteration = " + iter_counter + "."); System.out.println("Neighborhood Summary:"); for (int j = 0; j <= (i); j++) System.out.println("\tNh(" + (j) + "):\t" + N[j]); System.out.println("\n[PEGASUS] Radii and diameter computed."); System.out.println("[PEGASUS] Maximum diameter: " + (cur_radius - 1)); System.out.println("[PEGASUS] Average diameter: " + HadiUtils.average_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] 90% Effective diameter: " + HadiUtils.effective_diameter(N, cur_radius - 1)); System.out.println("[PEGASUS] Radii are saved in the HDFS " + radius_path.getName()); System.out.println("[PEGASUS] Radii summary is saved in the HDFS " + radius_summary_path.getName() + "\n"); return 0; }
From source file:pegasus.heigen.NormalizeVector.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 4) { return printUsage(); }//from ww w .j ava 2s .co m int i; input_path = new Path(args[0]); output_path = new Path(args[1]); nreducers = Integer.parseInt(args[2]); double additional_multiplier = Double.parseDouble(args[3]); System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println( "[PEGASUS] Normalizing a vector. input_path=" + args[0] + ", output_path=" + args[1] + "\n"); final FileSystem fs = FileSystem.get(getConf()); FileSystem lfs = FileSystem.getLocal(getConf()); // compute l1 norm String[] new_args = new String[1]; new_args[0] = args[0]; ToolRunner.run(getConf(), new L1norm(), new_args); double scalar = PegasusUtils.read_l1norm_result(getConf()); lfs.delete(new Path("l1norm"), true); System.out.println("L1norm = " + scalar); // multiply by scalar new_args = new String[2]; new_args[0] = args[0]; new_args[1] = new String("" + additional_multiplier / scalar); ToolRunner.run(getConf(), new ScalarMult(), new_args); fs.delete(output_path, true); fs.rename(new Path("smult_output"), output_path); System.out.println( "\n[PEGASUS] Normalization completed. The normalized vecotr is saved in HDFS " + args[1] + ".\n"); return 0; }
From source file:pegasus.pagerank.PagerankNaive.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 7) { return printUsage(); }/*from w w w. jav a 2 s .co m*/ int i; edge_path = new Path(args[0]); output_path = new Path(args[1] + "/pr_output"); vector_path = new Path(args[1] + "/pr_vector"); tempmv_path = new Path(args[1] + "/pr_tmp"); minmax_path = new Path(args[1] + "/pr_minmax"); distr_path = new Path(args[1] + "/pr_distr"); number_nodes = Integer.parseInt(args[2]); nreducers = Integer.parseInt(args[3]); niteration = Integer.parseInt(args[4]); if (args[5].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; int cur_iteration = 1; if (args[6].startsWith("cont")) cur_iteration = Integer.parseInt(args[6].substring(4)); local_output_path = args[2] + "_temp"; converge_threshold = ((double) 1.0 / (double) number_nodes) / 10; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing PageRank. Max iteration = " + niteration + ", " + "threshold = " + converge_threshold + ", cur_iteration=" + cur_iteration + "\n"); if (cur_iteration == 1) gen_initial_vector(number_nodes, vector_path); final FileSystem fs = FileSystem.get(vector_path.toUri(), getConf()); // Run pagerank until converges. for (i = cur_iteration; i <= niteration; i++) { long startT = System.currentTimeMillis(); JobClient.runJob(configStage1()); RunningJob job = JobClient.runJob(configStage2()); // The counter is newly created per every iteration. Counters c = job.getCounters(); long changed = c.getCounter(PrCounters.CONVERGE_CHECK); System.out.println("Iteration = " + i + ", changed reducer = " + changed); if (changed == 0) { System.out.println("PageRank vector converged. Now preparing to finish..."); fs.delete(vector_path); fs.delete(tempmv_path); fs.rename(output_path, vector_path); break; } // rotate directory fs.delete(vector_path); fs.delete(tempmv_path); fs.rename(output_path, vector_path); long stopT = System.currentTimeMillis(); System.out.printf("PEGASUS pagerank iteration %d costs %.2f sec.\n", i, (stopT - startT) / 1000.0f); } if (i == niteration) { System.out.println("Reached the max iteration. Now preparing to finish..."); } // find min/max of pageranks //System.out.println("Finding minimum and maximum pageranks..."); //JobClient.runJob(configStage3()); //FileUtil.fullyDelete( FileSystem.getLocal(getConf()), new Path(local_output_path)); //String new_path = local_output_path + "/" ; //fs.copyToLocalFile(minmax_path, new Path(new_path) ) ; //MinMaxInfo mmi = readMinMax( new_path ); //System.out.println("min = " + mmi.min + ", max = " + mmi.max ); // find distribution of pageranks //JobClient.runJob(configStage4(mmi.min, mmi.max)); System.out.println("\n[PEGASUS] PageRank computed."); System.out.println("[PEGASUS] The final PageRanks are in the HDFS pr_vector."); //System.out.println("[PEGASUS] The minium and maximum PageRanks are in the HDFS // pr_minmax."); //System.out.println("[PEGASUS] The histogram of PageRanks in 1000 bins between // min_PageRank and max_PageRank are in the HDFS pr_distr.\n"); return 0; }
From source file:pegasus.MinMaxInfo.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 7) { return printUsage(); }/* w w w. j a va 2s . co m*/ int i; edge_path = new Path(args[0]); output_path = new Path(args[1] + "/pr_output"); vector_path = new Path(args[1] + "/pr_vector"); tempmv_path = new Path(args[1] + "/pr_tmp"); minmax_path = new Path(args[1] + "/pr_minmax"); distr_path = new Path(args[1] + "/pr_distr"); number_nodes = Integer.parseInt(args[2]); nreducers = Integer.parseInt(args[3]); niteration = Integer.parseInt(args[4]); if (args[5].compareTo("makesym") == 0) make_symmetric = 1; else make_symmetric = 0; int cur_iteration = 1; if (args[6].startsWith("cont")) cur_iteration = Integer.parseInt(args[7].substring(4)); local_output_path = args[2] + "_temp"; converge_threshold = ((double) 1.0 / (double) number_nodes) / 10; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); System.out.println("[PEGASUS] Computing PageRank. Max iteration = " + niteration + ", threshold = " + converge_threshold + ", cur_iteration=" + cur_iteration + "\n"); if (cur_iteration == 1) gen_initial_vector(number_nodes, vector_path); final FileSystem fs = FileSystem.get(vector_path.toUri(), getConf()); // Run pagerank until converges. for (i = cur_iteration; i <= niteration; i++) { JobClient.runJob(configStage1()); RunningJob job = JobClient.runJob(configStage2()); // The counter is newly created per every iteration. Counters c = job.getCounters(); long changed = c.getCounter(PrCounters.CONVERGE_CHECK); System.out.println("Iteration = " + i + ", changed reducer = " + changed); if (changed == 0) { System.out.println("PageRank vector converged. Now preparing to finish..."); fs.delete(vector_path); fs.delete(tempmv_path); fs.rename(output_path, vector_path); break; } // rotate directory fs.delete(vector_path); fs.delete(tempmv_path); fs.rename(output_path, vector_path); } if (i == niteration) { System.out.println("Reached the max iteration. Now preparing to finish..."); } // find min/max of pageranks //System.out.println("Finding minimum and maximum pageranks..."); //JobClient.runJob(configStage3()); //FileUtil.fullyDelete( FileSystem.getLocal(getConf()), new Path(local_output_path)); //String new_path = local_output_path + "/" ; //fs.copyToLocalFile(minmax_path, new Path(new_path) ) ; //MinMaxInfo mmi = readMinMax( new_path ); //System.out.println("min = " + mmi.min + ", max = " + mmi.max ); // find distribution of pageranks //JobClient.runJob(configStage4(mmi.min, mmi.max)); System.out.println("\n[PEGASUS] PageRank computed."); System.out.println("[PEGASUS] The final PageRanks are in the HDFS pr_vector."); //System.out.println("[PEGASUS] The minium and maximum PageRanks are in the HDFS pr_minmax."); //System.out.println("[PEGASUS] The histogram of PageRanks in 1000 bins between min_PageRank and max_PageRank are in the HDFS pr_distr.\n"); return 0; }