List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob
public synchronized boolean addDependingJob(ControlledJob dependingJob)
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);/*from w w w . j a va 2 s. c om*/ } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.link_back.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 4) { System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n", getClass().getSimpleName()); System.exit(1);// w ww . ja v a2 s.c o m } String pre_output = args[3] + "/pre"; String step1_output = args[3] + "/step1"; String step2_output = args[3] + "/final"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("link back jobs"); Driver preDriver = new Driver(); String[] preArgs = new String[2]; preArgs[0] = args[0]; preArgs[1] = pre_output; Job preJob = preDriver.configJob(preArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver(); String[] step1Args = new String[3]; step1Args[0] = args[2]; step1Args[1] = args[1]; step1Args[2] = step1_output; Job step1Job = step1Driver.configJob(step1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(step1Job); jobControl.addJob(controlledJob1); clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver(); String[] args2 = new String[3]; args2[0] = pre_output; args2[1] = step1_output; args2[2] = step2_output; Job job2 = driver2.configJob(args2); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledPreJob); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);//from w w w .j ava 2s . c o m } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.simhash.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n", getClass().getSimpleName()); System.exit(1);//from w w w. java 2 s . co m } Path step1_outputDir = new Path(args[1] + "/step1"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("simhash.threshold", Integer.valueOf(args[2])); } else { conf.setInt("simhash.threshold", 3); } JobControl jobControl = new JobControl("simhash jobs"); Job job1 = Job.getInstance(conf, "simhash step1 job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Step1Mapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Text.class); job1.setReducerClass(Step1Reducer.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, step1_outputDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); Job job2 = Job.getInstance(conf, "simhash step2 job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, step1_outputDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Step2Mapper.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(Text.class); job2.setReducerClass(Step2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result")); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); long starttime = System.currentTimeMillis(); clustering.Utils.MapReduceUtils.runJobs(jobControl); boolean complete = job2.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.tf_idf.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n", getClass().getSimpleName()); System.exit(1);/*from w w w . ja va 2 s. c o m*/ } String docCntDir = args[1] + "/docCnt"; String step1_outputDir = args[1] + "/step1"; String step2_outputDir = args[1] + "/step2"; String step3_outputDir = args[1] + "/result"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("tf-idf jobs"); /* pre step, count documents number in the corpus */ DocCntDriver docCntDriver = new DocCntDriver(); String[] preJobArgs = new String[2]; preJobArgs[0] = args[0]; preJobArgs[1] = docCntDir; Job preJob = docCntDriver.configJob(preJobArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); /* step 1, calculate term count of each document */ TermCntDriver termCntDriver = new TermCntDriver(); String[] job1Args = new String[2]; job1Args[0] = args[0]; job1Args[1] = step1_outputDir; Job job1 = termCntDriver.configJob(job1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate the term frequency of each document */ TermFreqDriver termFreqDriver = new TermFreqDriver(); String gnameWeight = args.length > 2 ? args[2] : "1.0"; conf.setDouble("gname.weight", Double.valueOf(gnameWeight)); String[] job2Args = args.length > 2 ? new String[3] : new String[2]; job2Args[0] = step1_outputDir; job2Args[1] = step2_outputDir; if (args.length > 2) { job2Args[2] = args[2]; } Job job2 = termFreqDriver.configJob(job2Args); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); /* step 3, calculate tf_idf */ TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver(); String[] job3Args = new String[3]; job3Args[0] = docCntDir; job3Args[1] = step2_outputDir; job3Args[2] = step3_outputDir; Job job3 = tf_idf_driver.configJob(job3Args); ControlledJob controlledJob3 = new ControlledJob(conf); controlledJob3.setJob(job3); controlledJob3.addDependingJob(controlledJob2); controlledJob3.addDependingJob(controlledPreJob); jobControl.addJob(controlledJob3); // run jobs runJobs(jobControl); return job3.waitForCompletion(true) ? 0 : 1; }
From source file:com.jbw.jobcontrol.Patent.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job1 = Job.getInstance(conf);//from w w w .j a va 2 s. c om job1.setJobName("test"); job1.setJarByClass(Patent.class); ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf); job1.setReducerClass(IntSumReducer.class); Job job2 = Job.getInstance(); ControlledJob cjob1 = new ControlledJob(job1.getConfiguration()); ControlledJob cjob2 = new ControlledJob(job2.getConfiguration()); cjob2.addDependingJob(cjob1); JobControl jc = new JobControl("process job"); jc.addJob(cjob1); jc.addJob(cjob2); Thread t = new Thread(jc); t.start(); while (true) { for (ControlledJob j : jc.getRunningJobList()) { break; } break; } return 0; }
From source file:com.laizuozuoba.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from www. j a va2 s .com } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); Job job2 = new Job(conf, "uv"); job2.setJarByClass(WordCount.class); job2.setMapperClass(UVMapper.class); job2.setCombinerClass(UVReducer.class); job2.setReducerClass(UVReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2")); ControlledJob controlledJob = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); controlledJob2.addDependingJob(controlledJob); JobControl jc = new JobControl("123"); jc.addJob(controlledJob); jc.addJob(controlledJob2); Thread jcThread = new Thread(jc); jcThread.start(); while (true) { if (jc.allFinished()) { System.out.println(jc.getSuccessfulJobList()); jc.stop(); break; } if (jc.getFailedJobList().size() > 0) { System.out.println(jc.getFailedJobList()); jc.stop(); break; } Thread.sleep(1000); } System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!"); }
From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();/*from ww w .j a v a2 s . c om*/ Configuration conf = new Configuration(); String path = "hdfs://ns1:9000/user/root"; if (args.length != 0) { path = args[0]; } String[] args_1 = new String[] { path + "/chubao/input/contact", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" }; String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs(); // job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(JobControlTest.class); job.setMapperClass(UserIdMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // deleteOutputFile(otherArgs[1], otherArgs[0]); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // job Job job2 = Job.getInstance(conf, "job2"); job2.setJarByClass(JobControlTest.class); job2.setMapperClass(AddDateMapper.class); job2.setReducerClass(Job2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); // deleteOutputFile(otherArgs[2], otherArgs[1]); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); // ControlledJob ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); // ? controlledJob2.addDependingJob(controlledJob1); // JobControl JobControl jobControl = new JobControl("JobControlDemoGroup"); jobControl.addJob(controlledJob1); jobControl.addJob(controlledJob2); // ? Thread jobControlThread = new Thread(jobControl); jobControlThread.start(); while (true) { if (jobControl.allFinished()) { System.out.println(jobControl.getSuccessfulJobList()); jobControl.stop(); break; } } }
From source file:main.Driver.java
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub if (args.length != 5) { System.out.println("usage: [input1] [output1] [input2] [output2] [finaloutput]"); System.exit(-1);//from ww w . j a va 2 s . co m } _configuration = this.getConf(); System.out.println(_configuration.get("Hello")); ControlledJob nameJob = setSortingJob(args[0], args[1], LicenseOutputFormat.NAMES); ControlledJob licenseJob = setSortingJob(args[2], args[3], LicenseOutputFormat.LICENSE); ControlledJob mrJob = setMRJob(args[1], args[3], args[4]); mrJob.addDependingJob(nameJob); mrJob.addDependingJob(licenseJob); JobControl jobControl = new JobControl("MyJob"); jobControl.addJob(nameJob); jobControl.addJob(licenseJob); jobControl.addJob(mrJob); //jobControl.run(); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { System.out.println("Running"); Thread.sleep(5000); } System.out.println("<<<Done>>>"); return 0; }
From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java
License:Apache License
public Map<String, ControlledJob> getJobs(String[] args) throws Exception { Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>(); ControlledJob cNormsAndTranspose = null; ControlledJob cPairwiseSimilarity = null; ControlledJob cAsMatrix = null;//from w w w . j av a 2s. co m addInputOption(); addOutputOption(); addOption("numberOfColumns", "r", "Number of columns in the input matrix", false); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW)); addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false)); addOption("threshold", "tr", "discard row pairs with a similarity value below this", false); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return null; } String similarityClassnameArg = getOption("similarityClassname"); String similarityClassname; try { similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname(); } catch (IllegalArgumentException iae) { similarityClassname = similarityClassnameArg; } // Clear the output and temp paths if the overwrite option has been set if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { // Clear the temp path HadoopUtil.delete(getConf(), getTempPath()); // Clear the output path HadoopUtil.delete(getConf(), getOutputPath()); } int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow")); boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD; Path weightsPath = getTempPath("weights"); Path normsPath = getTempPath("norms.bin"); Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin"); Path maxValuesPath = getTempPath("maxValues.bin"); Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class); Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration(); normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold)); normsAndTransposeConf.set(NORMS_PATH, normsPath.toString()); normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString()); normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname); /** * this job is depending the last job --- countObservations */ cNormsAndTranspose = new ControlledJob(new Configuration()); cNormsAndTranspose.setJob(normsAndTranspose); cJobs.put("normsAndTranspose", cNormsAndTranspose); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class); pairwiseSimilarity.setCombinerClass(VectorSumReducer.class); Configuration pairwiseConf = pairwiseSimilarity.getConfiguration(); pairwiseConf.set(THRESHOLD, String.valueOf(threshold)); pairwiseConf.set(NORMS_PATH, normsPath.toString()); pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString()); pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname); //add prePath pairwiseConf.set("prepPath", getInputPath().toString()); pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity); /** * depending on normsAndTranspose job */ cPairwiseSimilarity = new ControlledJob(new Configuration()); cPairwiseSimilarity.setJob(pairwiseSimilarity); if (null != cNormsAndTranspose) { cPairwiseSimilarity.addDependingJob(cNormsAndTranspose); } cJobs.put("pairwiseSimilarity", cPairwiseSimilarity); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class); asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class); asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow); /** * depending on pairwiseSimilarity job */ cAsMatrix = new ControlledJob(new Configuration()); cAsMatrix.setJob(asMatrix); if (null != cPairwiseSimilarity) { cAsMatrix.addDependingJob(cPairwiseSimilarity); } cJobs.put("asMatrix", cAsMatrix); } return cJobs; }