Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob.

Prototype

public synchronized boolean addDependingJob(ControlledJob dependingJob)

Source Link

Document

Add a job to this jobs' dependency list.

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*from  w w w .  j  a va  2  s.  c  om*/
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.link_back.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n",
                getClass().getSimpleName());
        System.exit(1);// w ww .  ja v a2  s.c o  m
    }

    String pre_output = args[3] + "/pre";
    String step1_output = args[3] + "/step1";
    String step2_output = args[3] + "/final";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("link back jobs");

    Driver preDriver = new Driver();
    String[] preArgs = new String[2];
    preArgs[0] = args[0];
    preArgs[1] = pre_output;
    Job preJob = preDriver.configJob(preArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver();
    String[] step1Args = new String[3];
    step1Args[0] = args[2];
    step1Args[1] = args[1];
    step1Args[2] = step1_output;
    Job step1Job = step1Driver.configJob(step1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(step1Job);
    jobControl.addJob(controlledJob1);

    clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver();
    String[] args2 = new String[3];
    args2[0] = pre_output;
    args2[1] = step1_output;
    args2[2] = step2_output;
    Job job2 = driver2.configJob(args2);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledPreJob);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);//from  w  w w .j  ava 2s . c o m
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//from   w w  w.  java 2  s  . co  m
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);/*from   w  w w  .  ja va 2 s.  c  o  m*/
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.jobcontrol.Patent.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Job job1 = Job.getInstance(conf);//from  w  w  w .j  a va  2 s.  c om
    job1.setJobName("test");
    job1.setJarByClass(Patent.class);

    ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            conf);
    ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf);

    job1.setReducerClass(IntSumReducer.class);

    Job job2 = Job.getInstance();
    ControlledJob cjob1 = new ControlledJob(job1.getConfiguration());
    ControlledJob cjob2 = new ControlledJob(job2.getConfiguration());
    cjob2.addDependingJob(cjob1);
    JobControl jc = new JobControl("process job");
    jc.addJob(cjob1);
    jc.addJob(cjob2);
    Thread t = new Thread(jc);
    t.start();
    while (true) {
        for (ControlledJob j : jc.getRunningJobList()) {
            break;
        }
        break;
    }
    return 0;
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   www.  j  a va2 s .com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*from   ww  w .j  a v  a2  s . c om*/
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:main.Driver.java

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    if (args.length != 5) {
        System.out.println("usage: [input1] [output1] [input2] [output2] [finaloutput]");
        System.exit(-1);//from  ww w . j a va 2 s . co m
    }

    _configuration = this.getConf();
    System.out.println(_configuration.get("Hello"));
    ControlledJob nameJob = setSortingJob(args[0], args[1], LicenseOutputFormat.NAMES);
    ControlledJob licenseJob = setSortingJob(args[2], args[3], LicenseOutputFormat.LICENSE);
    ControlledJob mrJob = setMRJob(args[1], args[3], args[4]);
    mrJob.addDependingJob(nameJob);
    mrJob.addDependingJob(licenseJob);
    JobControl jobControl = new JobControl("MyJob");
    jobControl.addJob(nameJob);
    jobControl.addJob(licenseJob);
    jobControl.addJob(mrJob);

    //jobControl.run();
    Thread thread = new Thread(jobControl);
    thread.start();

    while (!jobControl.allFinished()) {
        System.out.println("Running");
        Thread.sleep(5000);
    }
    System.out.println("<<<Done>>>");
    return 0;
}

From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java

License:Apache License

public Map<String, ControlledJob> getJobs(String[] args) throws Exception {
    Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>();
    ControlledJob cNormsAndTranspose = null;
    ControlledJob cPairwiseSimilarity = null;
    ControlledJob cAsMatrix = null;//from   w w  w .  j  av  a  2s. co m

    addInputOption();
    addOutputOption();
    addOption("numberOfColumns", "r", "Number of columns in the input matrix", false);
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
    addOption("maxSimilaritiesPerRow", "m",
            "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')',
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
    addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?",
            String.valueOf(false));
    addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return null;
    }

    String similarityClassnameArg = getOption("similarityClassname");
    String similarityClassname;
    try {
        similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
    } catch (IllegalArgumentException iae) {
        similarityClassname = similarityClassnameArg;
    }

    // Clear the output and temp paths if the overwrite option has been set
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        // Clear the temp path
        HadoopUtil.delete(getConf(), getTempPath());
        // Clear the output path
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
    boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD;

    Path weightsPath = getTempPath("weights");
    Path normsPath = getTempPath("norms.bin");
    Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin");
    Path maxValuesPath = getTempPath("maxValues.bin");
    Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath,
                VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
                IntWritable.class, VectorWritable.class);
        normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class);
        Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration();
        normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold));
        normsAndTransposeConf.set(NORMS_PATH, normsPath.toString());
        normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        /**
         * this job is depending the last job --- countObservations
         */
        cNormsAndTranspose = new ControlledJob(new Configuration());
        cNormsAndTranspose.setJob(normsAndTranspose);
        cJobs.put("normsAndTranspose", cNormsAndTranspose);
    }
    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class,
                IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class,
                VectorWritable.class);
        pairwiseSimilarity.setCombinerClass(VectorSumReducer.class);
        Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
        pairwiseConf.set(THRESHOLD, String.valueOf(threshold));
        pairwiseConf.set(NORMS_PATH, normsPath.toString());
        pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        //add prePath
        pairwiseConf.set("prepPath", getInputPath().toString());
        pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity);
        /**
         * depending on normsAndTranspose job
         */
        cPairwiseSimilarity = new ControlledJob(new Configuration());
        cPairwiseSimilarity.setJob(pairwiseSimilarity);
        if (null != cNormsAndTranspose) {
            cPairwiseSimilarity.addDependingJob(cNormsAndTranspose);
        }
        cJobs.put("pairwiseSimilarity", cPairwiseSimilarity);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class,
                IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class,
                IntWritable.class, VectorWritable.class);
        asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class);
        asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
        /**
         * depending on pairwiseSimilarity job
         */
        cAsMatrix = new ControlledJob(new Configuration());
        cAsMatrix.setJob(asMatrix);
        if (null != cPairwiseSimilarity) {
            cAsMatrix.addDependingJob(cPairwiseSimilarity);
        }
        cJobs.put("asMatrix", cAsMatrix);
    }

    return cJobs;
}