List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java
License:Open Source License
@SuppressWarnings("deprecation") @Override/*ww w. j a v a2 s. com*/ public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage:\n"); System.exit(1); } // Job job = new Job(super.getConf()); // READ IN ALL COMMAND LINE ARGUMENTS // EXAMPLE: // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar // -Dmapred.job.map.memory.mb=4096 // -Dmapred.job.reduce.memory.mb=4096 // -Dmapred.child.java.opts=-Xmx3500m // -Dmapreduce.task.timeout=60000000 // -Dmapreduce.job.queuename=QUEUENAME // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1 // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/ int m = -1; // input path to use on hdfs Path inputPath = new Path(args[++m]); // output path to use on hdfs Path outputPath = new Path(args[++m]); // number of Mappers to split the sources: e.g., 1, 10, 100 etc. // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number. int numOfMaps = Integer.parseInt(args[++m]); // number of Reducers to collect the output int numOfReduce = Integer.parseInt(args[++m]); // Number of vertices in graph int N = Integer.parseInt(args[++m]); // Number of edges in graph int M = Integer.parseInt(args[++m]); // Graph file (edge list, tab delimited) (full path) String graph = args[++m]; // File with edges to be added (tab delimited) (full path) // Note: this version handles only edges between existing vertices in the graph. String random_edges = args[++m]; // Number of random edges added int re = Integer.parseInt(args[++m]); // Experiment iteration (in case of multiple experiments) int iter = Integer.parseInt(args[++m]); // Use combiner or not (true/false) Boolean comb = Boolean.valueOf(args[++m]); // Output path for file with stats String statsoutputpath = args[++m]; // Output path for file with final betweenness values String betoutputpath = args[++m]; // BEGIN INITIALIZATION JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class); FileSystem fs = FileSystem.get(conf); String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb; conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter); conf.set("HDFS_GRAPH", graph + setup); conf.set("HDFS_Random_Edges", random_edges + setup); conf.set("output", outputPath.getName()); conf.set("setup", setup); // CREATE INPUT FILES FOR MAPPERS int numOfTasksperMap = (int) Math.ceil(N / numOfMaps); //generate an input file for each map task for (int i = 0; i < numOfMaps - 1; i++) { Path file = new Path(inputPath, "part-r-" + i); IntWritable start = new IntWritable(i * numOfTasksperMap); IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end); } // last mapper takes what is left Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1)); IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap); IntWritable end = new IntWritable(N - 1); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(start, end); } finally { writer.close(); } System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end); // COPY FILES TO MAPPERS System.out.println("Copying graph to cache"); String LOCAL_GRAPH = graph; Path hdfsPath = new Path(graph + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); System.out.println("Copying random edges to cache"); String LOCAL_Random_Edges = random_edges; hdfsPath = new Path(random_edges + setup); // upload the file to hdfs. Overwrite any existing copy. fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath); DistributedCache.addCacheFile(hdfsPath.toUri(), conf); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapperClass(IterBrandesMapper.class); conf.setNumMapTasks(numOfMaps); if (comb) conf.setCombinerClass(IterBrandesReducer.class); conf.setReducerClass(IterBrandesReducer.class); conf.setNumReduceTasks(numOfReduce); // turn off speculative execution, because DFS doesn't handle multiple writers to the same file. conf.setSpeculativeExecution(false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // conf.set("mapred.job.name", "APS-" + outputPath.getName()); conf.setNumTasksToExecutePerJvm(-1); // JVM reuse System.out.println("Starting the execution...! Pray!! \n"); long time1 = System.nanoTime(); RunningJob rj = JobClient.runJob(conf); long time2 = System.nanoTime(); // READ OUTPUT FILES System.out.println("\nFinished and now reading/writing Betweenness Output...\n"); // Assuming 1 reducer. Path inFile = new Path(outputPath, "part-00000"); IntWritable id = new IntWritable(); DoubleWritable betweenness = new DoubleWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter)); try { int i = 0; for (; i < (N + (M - re)); i++) { reader.next(id, betweenness); fw.write(id + "\t" + betweenness + "\n"); fw.flush(); } } finally { reader.close(); fw.close(); } System.out.println("\nWriting times Output...\n"); fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter)); fw.write("Total-time:\t" + (time2 - time1) + "\n"); fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_MAPS") + "\n"); fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("SLOTS_MILLIS_REDUCES") + "\n"); fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("CPU_MILLISECONDS") + "\n"); fw.write("total-gc-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS") + "\n"); fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("PHYSICAL_MEMORY_BYTES") + "\n"); fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter") .getCounter("VIRTUAL_MEMORY_BYTES") + "\n"); fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes") + "\n"); fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n"); fw.flush(); try { Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator(); while (counters.hasNext()) { Counter cc = counters.next(); fw.write(cc.getName() + "\t" + cc.getCounter() + "\n"); fw.flush(); } } finally { fw.close(); } return 0; }
From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java
License:Apache License
protected RunningJob join(short blockWidth, int recurDiagonalMult, Path edgePath, Path curVectorPath, Path tempVectorPath, int numOfReducers, String makeSymmetric, FileSystem fs) throws Exception { Utility.deleteIfExists(fs, tempVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class); conf.set(BLOCK_WIDTH, "" + blockWidth); conf.set(RECURSIVE_DIAG_MULT, "" + recurDiagonalMult); conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.set(SEPARATOR_WITHIN_VALUE, SPACE); conf.set(MAKE_SYMMETRIC, makeSymmetric); conf.setJobName("CCBlock_join"); conf.setMapperClass(JoinMapper.class); conf.setReducerClass(JoinReducer.class); FileInputFormat.setInputPaths(conf, edgePath, curVectorPath); FileOutputFormat.setOutputPath(conf, tempVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java
License:Apache License
protected RunningJob merge(short blockWidth, Path tempVectorPath, Path nextVectorPath, int numOfReducers, FileSystem fs) throws Exception { Utility.deleteIfExists(fs, nextVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class); conf.set(BLOCK_WIDTH, "" + blockWidth); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.setJobName("CCBlock_reduce"); conf.setMapperClass(MergeMapper.class); conf.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(conf, tempVectorPath); FileOutputFormat.setOutputPath(conf, nextVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.ConnectedComponents.java
License:Apache License
protected RunningJob join(FileSystem fs, Path edgePath, Path vecPath, Path tempVectorPath, String makeSymmetric, int numOfReducers) throws Exception { Utility.deleteIfExists(fs, tempVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponents.class); conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.set(MAKE_SYMMETRIC, makeSymmetric); conf.setJobName("ConnectedComponents_Join"); conf.setMapperClass(JoinMapper.class); conf.setReducerClass(JoinReducer.class); FileInputFormat.setInputPaths(conf, edgePath, vecPath); FileOutputFormat.setOutputPath(conf, tempVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:me.tingri.graphs.cc.ConnectedComponents.java
License:Apache License
protected RunningJob merge(FileSystem fs, Path tempVectorPath, Path nextVectorPath, int numOfReducers) throws Exception { Utility.deleteIfExists(fs, nextVectorPath); JobConf conf = new JobConf(getConf(), ConnectedComponents.class); conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR); conf.setJobName("ConnectedComponents_Merge"); conf.setMapperClass(MergeMapper.class); conf.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(conf, tempVectorPath); FileOutputFormat.setOutputPath(conf, nextVectorPath); conf.setNumReduceTasks(numOfReducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return JobClient.runJob(conf); }
From source file:net.sf.katta.indexing.IndexerJob.java
License:Apache License
public void startIndexer(String path, String finalDestination, int numOfShards) throws IOException { // create job conf with class pointing into job jar. JobConf jobConf = new JobConf(IndexerJob.class); jobConf.setJobName("indexer"); jobConf.setMapRunnerClass(Indexer.class); // alternative use a text file and a TextInputFormat jobConf.setInputFormat(SequenceFileInputFormat.class); Path input = new Path(path); FileInputFormat.setInputPaths(jobConf, input); // we just set the output path to make hadoop happy. FileOutputFormat.setOutputPath(jobConf, new Path(finalDestination)); // setting the folder where lucene indexes will be copied when finished. jobConf.set("finalDestination", finalDestination); // important to switch spec exec off. // We dont want to have something duplicated. jobConf.setSpeculativeExecution(false); // The num of map tasks is equal to the num of input splits. // The num of input splits by default is equal to the num of hdf blocks // for the input file(s). To get the right num of shards we need to // calculate the best input split size. FileSystem fs = FileSystem.get(input.toUri(), jobConf); FileStatus[] status = fs.globStatus(input); long size = 0; for (FileStatus fileStatus : status) { size += fileStatus.getLen();// w w w.j av a 2 s. co m } long optimalSplisize = size / numOfShards; jobConf.set("mapred.min.split.size", "" + optimalSplisize); // give more mem to lucene tasks. jobConf.set("mapred.child.java.opts", "-Xmx2G"); jobConf.setNumMapTasks(1); jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); }
From source file:nl.tudelft.graphalytics.mapreducev2.MapReduceJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { FileSystem dfs = FileSystem.get(getConf()); String inPath = inputPath;/*from w ww.j a v a 2s .com*/ while (!isFinished()) { iteration++; // Prepare job configuration JobConf jobConfiguration = new JobConf(this.getConf()); jobConfiguration.setJarByClass(this.getClass()); jobConfiguration.setMapOutputKeyClass(getMapOutputKeyClass()); jobConfiguration.setMapOutputValueClass(getMapOutputValueClass()); jobConfiguration.setMapperClass(getMapperClass()); if (getCombinerClass() != null) jobConfiguration.setCombinerClass(getCombinerClass()); jobConfiguration.setReducerClass(getReducerClass()); jobConfiguration.setOutputKeyClass(getOutputKeyClass()); jobConfiguration.setOutputValueClass(getOutputValueClass()); jobConfiguration.setInputFormat(getInputFormatClass()); jobConfiguration.setOutputFormat(getOutputFormatClass()); if (getNumMappers() != -1) jobConfiguration.setNumMapTasks(getNumMappers()); if (getNumReducers() != -1) jobConfiguration.setNumReduceTasks(getNumReducers()); setConfigurationParameters(jobConfiguration); // Set the input and output paths String outPath = intermediatePath + "/iteration-" + iteration; FileInputFormat.addInputPath(jobConfiguration, new Path(inPath)); FileOutputFormat.setOutputPath(jobConfiguration, new Path(outPath)); // Execute the current iteration RunningJob jobExecution = JobClient.runJob(jobConfiguration); jobExecution.waitForCompletion(); // Remove the output of the previous job (unless it is the input graph) if (iteration != 1) { dfs.delete(new Path(inPath), true); } inPath = outPath; processJobOutput(jobExecution); } // Rename the last job output to the specified output path try { dfs.mkdirs(new Path(outputPath).getParent()); dfs.rename(new Path(inPath), new Path(outputPath)); } catch (Exception e) { LOG.warn("Failed to rename MapReduce job output.", e); } return 0; }
From source file:nlp.com.knowledgebooks.mapreduce.NameFinder.java
License:Open Source License
/** * The main driver for name finder map/reduce program. * <p/>//from w w w . j a v a 2 s.c om * NOTE: copied with modifications from Hadoppjava example programs * <p/> * Invoke this method to submit the map/reduce job. * * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), NameFinder.class); conf.setJobName("namefinder"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); //conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (Exception ex) { System.err.println("ERROR: " + ex); } } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p, int reduceTasks, int mis) throws Exception { JobConf job = new JobConf(conf, ABtDenseOutJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(QJob.PROP_K, k);//w w w .j a v a 2 s . c o m job.setInt(QJob.PROP_P, p); job.set(PROP_BT_PATH, inputBt.toString()); FileOutputFormat.setOutputPath(job, outputPath); job.setJobName("ABtDenseOutJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapperClass(ABtMapper.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputPath); RunningJob rj = JobClient.runJob(job); }
From source file:nthu.scopelab.tsqr.ssvd.BtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, BtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);// ww w . j ava 2 s.c o m job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("BtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("Btjob Job ID: " + rj.getJobID().toString()); }