Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob

List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob addDependingJob.

Prototype

public synchronized boolean addDependingJob(ControlledJob dependingJob) 

Source Link

Document

Add a job to this jobs' dependency list.

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*from  w w w .  j  a va  2  s.  c  om*/
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.link_back.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n",
                getClass().getSimpleName());
        System.exit(1);// w ww .  ja v a2  s.c o  m
    }

    String pre_output = args[3] + "/pre";
    String step1_output = args[3] + "/step1";
    String step2_output = args[3] + "/final";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("link back jobs");

    Driver preDriver = new Driver();
    String[] preArgs = new String[2];
    preArgs[0] = args[0];
    preArgs[1] = pre_output;
    Job preJob = preDriver.configJob(preArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver();
    String[] step1Args = new String[3];
    step1Args[0] = args[2];
    step1Args[1] = args[1];
    step1Args[2] = step1_output;
    Job step1Job = step1Driver.configJob(step1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(step1Job);
    jobControl.addJob(controlledJob1);

    clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver();
    String[] args2 = new String[3];
    args2[0] = pre_output;
    args2[1] = step1_output;
    args2[2] = step2_output;
    Job job2 = driver2.configJob(args2);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledPreJob);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);//from  w  w w .j  ava 2s . c o m
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//from   w w  w.  java 2  s  . co  m
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);/*from   w  w w  .  ja va 2 s.  c  o  m*/
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.jobcontrol.Patent.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Job job1 = Job.getInstance(conf);//from  w  w  w .j  a va  2 s.  c om
    job1.setJobName("test");
    job1.setJarByClass(Patent.class);

    ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            conf);
    ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf);

    job1.setReducerClass(IntSumReducer.class);

    Job job2 = Job.getInstance();
    ControlledJob cjob1 = new ControlledJob(job1.getConfiguration());
    ControlledJob cjob2 = new ControlledJob(job2.getConfiguration());
    cjob2.addDependingJob(cjob1);
    JobControl jc = new JobControl("process job");
    jc.addJob(cjob1);
    jc.addJob(cjob2);
    Thread t = new Thread(jc);
    t.start();
    while (true) {
        for (ControlledJob j : jc.getRunningJobList()) {
            break;
        }
        break;
    }
    return 0;
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   www.  j  a va2 s .com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*from   ww  w .j  a v  a2  s . c om*/
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:main.Driver.java

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    if (args.length != 5) {
        System.out.println("usage: [input1] [output1] [input2] [output2] [finaloutput]");
        System.exit(-1);//from  ww w . j a va 2 s . co m
    }

    _configuration = this.getConf();
    System.out.println(_configuration.get("Hello"));
    ControlledJob nameJob = setSortingJob(args[0], args[1], LicenseOutputFormat.NAMES);
    ControlledJob licenseJob = setSortingJob(args[2], args[3], LicenseOutputFormat.LICENSE);
    ControlledJob mrJob = setMRJob(args[1], args[3], args[4]);
    mrJob.addDependingJob(nameJob);
    mrJob.addDependingJob(licenseJob);
    JobControl jobControl = new JobControl("MyJob");
    jobControl.addJob(nameJob);
    jobControl.addJob(licenseJob);
    jobControl.addJob(mrJob);

    //jobControl.run();
    Thread thread = new Thread(jobControl);
    thread.start();

    while (!jobControl.allFinished()) {
        System.out.println("Running");
        Thread.sleep(5000);
    }
    System.out.println("<<<Done>>>");
    return 0;
}

From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java

License:Apache License

public Map<String, ControlledJob> getJobs(String[] args) throws Exception {
    Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>();
    ControlledJob cNormsAndTranspose = null;
    ControlledJob cPairwiseSimilarity = null;
    ControlledJob cAsMatrix = null;//from   w w  w .  j  av  a  2s. co m

    addInputOption();
    addOutputOption();
    addOption("numberOfColumns", "r", "Number of columns in the input matrix", false);
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
    addOption("maxSimilaritiesPerRow", "m",
            "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')',
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
    addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?",
            String.valueOf(false));
    addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return null;
    }

    String similarityClassnameArg = getOption("similarityClassname");
    String similarityClassname;
    try {
        similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
    } catch (IllegalArgumentException iae) {
        similarityClassname = similarityClassnameArg;
    }

    // Clear the output and temp paths if the overwrite option has been set
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        // Clear the temp path
        HadoopUtil.delete(getConf(), getTempPath());
        // Clear the output path
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
    boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD;

    Path weightsPath = getTempPath("weights");
    Path normsPath = getTempPath("norms.bin");
    Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin");
    Path maxValuesPath = getTempPath("maxValues.bin");
    Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath,
                VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
                IntWritable.class, VectorWritable.class);
        normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class);
        Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration();
        normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold));
        normsAndTransposeConf.set(NORMS_PATH, normsPath.toString());
        normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        /**
         * this job is depending the last job --- countObservations
         */
        cNormsAndTranspose = new ControlledJob(new Configuration());
        cNormsAndTranspose.setJob(normsAndTranspose);
        cJobs.put("normsAndTranspose", cNormsAndTranspose);
    }
    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class,
                IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class,
                VectorWritable.class);
        pairwiseSimilarity.setCombinerClass(VectorSumReducer.class);
        Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
        pairwiseConf.set(THRESHOLD, String.valueOf(threshold));
        pairwiseConf.set(NORMS_PATH, normsPath.toString());
        pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        //add prePath
        pairwiseConf.set("prepPath", getInputPath().toString());
        pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity);
        /**
         * depending on normsAndTranspose job
         */
        cPairwiseSimilarity = new ControlledJob(new Configuration());
        cPairwiseSimilarity.setJob(pairwiseSimilarity);
        if (null != cNormsAndTranspose) {
            cPairwiseSimilarity.addDependingJob(cNormsAndTranspose);
        }
        cJobs.put("pairwiseSimilarity", cPairwiseSimilarity);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class,
                IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class,
                IntWritable.class, VectorWritable.class);
        asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class);
        asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
        /**
         * depending on pairwiseSimilarity job
         */
        cAsMatrix = new ControlledJob(new Configuration());
        cAsMatrix.setJob(asMatrix);
        if (null != cPairwiseSimilarity) {
            cAsMatrix.addDependingJob(cPairwiseSimilarity);
        }
        cJobs.put("asMatrix", cAsMatrix);
    }

    return cJobs;
}