Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob setJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob setJob.

Prototype

public synchronized void setJob(Job job)

Source Link

Document

Set the mapreduce job

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*  www  .j av a 2s  . c  o  m*/
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.link_back.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n",
                getClass().getSimpleName());
        System.exit(1);//from   ww w .  j a v  a 2  s. c  o  m
    }

    String pre_output = args[3] + "/pre";
    String step1_output = args[3] + "/step1";
    String step2_output = args[3] + "/final";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("link back jobs");

    Driver preDriver = new Driver();
    String[] preArgs = new String[2];
    preArgs[0] = args[0];
    preArgs[1] = pre_output;
    Job preJob = preDriver.configJob(preArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver();
    String[] step1Args = new String[3];
    step1Args[0] = args[2];
    step1Args[1] = args[1];
    step1Args[2] = step1_output;
    Job step1Job = step1Driver.configJob(step1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(step1Job);
    jobControl.addJob(controlledJob1);

    clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver();
    String[] args2 = new String[3];
    args2[0] = pre_output;
    args2[1] = step1_output;
    args2[2] = step2_output;
    Job job2 = driver2.configJob(args2);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledPreJob);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);//from   w w w.ja v  a2 s  . c  o m
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//from  w  ww . j  a v a2s  .  c o m
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);//w  w  w  .jav a 2 s  . co m
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java

License:Apache License

public Map<String, ControlledJob> getJobs(String[] args) throws Exception {
    Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>();
    ControlledJob cNormsAndTranspose = null;
    ControlledJob cPairwiseSimilarity = null;
    ControlledJob cAsMatrix = null;// w  w w . ja va2 s  .c o  m

    addInputOption();
    addOutputOption();
    addOption("numberOfColumns", "r", "Number of columns in the input matrix", false);
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
    addOption("maxSimilaritiesPerRow", "m",
            "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')',
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
    addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?",
            String.valueOf(false));
    addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return null;
    }

    String similarityClassnameArg = getOption("similarityClassname");
    String similarityClassname;
    try {
        similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname();
    } catch (IllegalArgumentException iae) {
        similarityClassname = similarityClassnameArg;
    }

    // Clear the output and temp paths if the overwrite option has been set
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        // Clear the temp path
        HadoopUtil.delete(getConf(), getTempPath());
        // Clear the output path
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
    boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD;

    Path weightsPath = getTempPath("weights");
    Path normsPath = getTempPath("norms.bin");
    Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin");
    Path maxValuesPath = getTempPath("maxValues.bin");
    Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath,
                VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
                IntWritable.class, VectorWritable.class);
        normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class);
        Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration();
        normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold));
        normsAndTransposeConf.set(NORMS_PATH, normsPath.toString());
        normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        /**
         * this job is depending the last job --- countObservations
         */
        cNormsAndTranspose = new ControlledJob(new Configuration());
        cNormsAndTranspose.setJob(normsAndTranspose);
        cJobs.put("normsAndTranspose", cNormsAndTranspose);
    }
    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class,
                IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class,
                VectorWritable.class);
        pairwiseSimilarity.setCombinerClass(VectorSumReducer.class);
        Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
        pairwiseConf.set(THRESHOLD, String.valueOf(threshold));
        pairwiseConf.set(NORMS_PATH, normsPath.toString());
        pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString());
        pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString());
        pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname);
        //add prePath
        pairwiseConf.set("prepPath", getInputPath().toString());
        pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity);
        /**
         * depending on normsAndTranspose job
         */
        cPairwiseSimilarity = new ControlledJob(new Configuration());
        cPairwiseSimilarity.setJob(pairwiseSimilarity);
        if (null != cNormsAndTranspose) {
            cPairwiseSimilarity.addDependingJob(cNormsAndTranspose);
        }
        cJobs.put("pairwiseSimilarity", cPairwiseSimilarity);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class,
                IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class,
                IntWritable.class, VectorWritable.class);
        asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class);
        asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
        /**
         * depending on pairwiseSimilarity job
         */
        cAsMatrix = new ControlledJob(new Configuration());
        cAsMatrix.setJob(asMatrix);
        if (null != cPairwiseSimilarity) {
            cAsMatrix.addDependingJob(cPairwiseSimilarity);
        }
        cJobs.put("asMatrix", cAsMatrix);
    }

    return cJobs;
}

From source file:tv.icntv.grade.film.dbcollect.TableConcurrencyJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    String[] tables = configuration.get("hbase.cdn.tables").split(",");
    JobControl jobControl = new JobControl("data init");
    for (String table : tables) {
        //// w w w. java  2s . c  o m
        String hbaseDbDirectory = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(),
                table);
        HadoopUtils.deleteIfExist(hbaseDbDirectory);
        Job tableJob = new Job(configuration, "icntv grade init " + table);
        TableMapReduceUtil.initTableMapperJob(table, new Scan(), TableInitMapper.class, Text.class, Text.class,
                tableJob);
        MapReduceUtils.initReducerJob(new Path(hbaseDbDirectory), TableInitReducer.class, tableJob);
        // controlled job
        ControlledJob tableControlledJob = new ControlledJob(configuration);
        tableControlledJob.setJob(tableJob);

        String dbDirectory = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table);
        HadoopUtils.deleteIfExist(dbDirectory);
        Configuration conf = getConf();
        Job db = new Job(conf, "icntv db collect " + table);
        conf.setLong("mapred.min.split.size", 512 * 2014 * 1024L);
        MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db,
                new Path(strings[1]));
        FileOutputFormat.setOutputPath(db, new Path(dbDirectory));
        db.setNumReduceTasks(0);
        ControlledJob dbControlledJob = new ControlledJob(conf);
        dbControlledJob.setJob(db);
        dbControlledJob.addDependingJob(tableControlledJob);
        //controlledJob.
        jobControl.addJob(tableControlledJob);
        jobControl.addJob(dbControlledJob);
    }
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    return 0;
}

From source file:tv.icntv.grade.film.dbcollect.TableInitJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {

    Configuration configuration = getConf();
    JobControl jobControl = new JobControl("init data");
    for (String table : strings) {
        String dbPath = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table);
        //            String[] arrays = new String[]{table,//input table
        //                    String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table),
        //                    db
        //            };
        String hbasePath = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table);
        //table job
        Job tableJob = new Job(configuration, "icntv grade init");
        Scan scan = new Scan();

        HadoopUtils.deleteIfExist(hbasePath);
        HadoopUtils.deleteIfExist(dbPath);
        TableMapReduceUtil.initTableMapperJob(table, scan, TableInitMapper.class, Text.class, Text.class,
                tableJob);// w ww  . jav  a2  s .  c om
        MapReduceUtils.initReducerJob(new Path(hbasePath), TableInitReducer.class, tableJob);
        ControlledJob firstControll = new ControlledJob(configuration);
        firstControll.setJob(tableJob);
        //            tableJob.waitForCompletion(true);
        Job db = new Job(configuration, "icntv db collect");
        configuration.setLong("mapred.min.split.size", 512 * 2014 * 1024L);
        MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db,
                new Path(hbasePath));
        FileOutputFormat.setOutputPath(db, new Path(dbPath));
        db.setNumReduceTasks(0);
        ControlledJob secondaryController = new ControlledJob(configuration);
        secondaryController.setJob(db);
        secondaryController.addDependingJob(firstControll);
        jobControl.addJob(firstControll);
        jobControl.addJob(secondaryController);
    }
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    logger.info("job controller successed job size=" + jobControl.getSuccessfulJobList().size());
    //        db.waitForCompletion(true);
    return 0;
}

From source file:tv.icntv.grade.film.grade.GradeJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = this.getConf();
    //num  job/*from   w ww  . j a va2  s . c o  m*/
    Job numJob = new Job(configuration, "calculate film seed num job ");
    Path[] paths = getPaths(strings[0].split(","));
    MapReduceUtils.initMapperJob(NumCountMapper.class, Text.class, LongWritable.class, this.getClass(), numJob,
            paths);
    //        TableMapReduceUtil.initTableReducerJob(strings[1], NumCountReducer.class, numJob);
    ControlledJob controlledJob3 = new ControlledJob(configuration);
    controlledJob3.setJob(numJob);
    //time  job
    Job timeJob = new Job(configuration, "calculate film time middle job");
    MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, paths);
    timeJob.setCombinerClass(TimeCombiner.class);
    MapReduceUtils.initReducerJob(new Path(strings[2]), TimeReducer.class, timeJob);

    ControlledJob controlledJob4 = new ControlledJob(configuration);
    controlledJob4.setJob(timeJob);

    //        //?
    Job correlate = new Job(configuration, "icntv correlate job");
    MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate,
            paths);
    MapReduceUtils.initReducerJob(new Path(strings[3]), UserHistoryReducer.class, correlate);
    ControlledJob correlateController = new ControlledJob(configuration);
    correlateController.setJob(correlate);
    //        controlledJob3.getDependentJobs().add()

    JobControl jobControl = new JobControl("unit grade");

    jobControl.addJob(controlledJob3);
    jobControl.addJob(controlledJob4);
    //        jobControl.addJob(controlledJob5);
    jobControl.addJob(correlateController);
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    //        Counter counter =correlate.getCounters().findCounter(groupName,countName);
    //        HadoopUtils.save(new Path(configuration.get("icntv.temp.file")),counter.getValue());
    return 0;
}

From source file:tv.icntv.log.stb.GenerateStbLogJob.java

License:Apache License

@Override
public boolean run(Map<String, String> maps) throws Exception {
    //To change body of implemented methods use File | Settings | File Templates.

    Configuration configuration = getConf();
    //user login/*from  w  ww  . j av a  2  s.  c om*/
    Job userLogin = Job.getInstance(configuration, "user login job");
    userLogin.setMapperClass(ParserLoginMapper.class);
    userLogin.setJarByClass(this.getClass());
    userLogin.setOutputKeyClass(NullWritable.class);
    userLogin.setOutputValueClass(Text.class);
    Path userLoginOutput = new Path(maps.get(USER_LOGIN_JOB_OUTPUT));
    FileInputFormat.addInputPath(userLogin, new Path(maps.get(USER_LOGIN_JOB_INPUT)));
    FileOutputFormat.setOutputPath(userLogin, userLoginOutput);
    userLogin.setNumReduceTasks(0);
    ControlledJob userControlledJob = new ControlledJob(configuration);
    userControlledJob.setJob(userLogin);
    //
    //player
    Job player = Job.getInstance(configuration, "player job");
    player.setJarByClass(getClass());
    player.setMapperClass(PlayerMapper.class);
    player.setOutputValueClass(Text.class);
    player.setOutputKeyClass(NullWritable.class);
    FileInputFormat.addInputPath(player, new Path(maps.get(PLAYER_JOB_INPUT)));
    FileOutputFormat.setOutputPath(player, new Path(maps.get(PLAYER_JOB_OUTPUT)));
    player.setNumReduceTasks(0);
    ControlledJob playControlledJob = new ControlledJob(configuration);
    playControlledJob.setJob(player);

    //contentView
    Job contentView = Job.getInstance(configuration, "content view job");
    contentView.setJarByClass(getClass());
    contentView.setMapperClass(ContentViewMapperBack.class);
    contentView.setOutputKeyClass(NullWritable.class);
    contentView.setOutputValueClass(Text.class);
    //System.out.println(MessageFormat.format(maps.get(OUTPUT_PREFIX),day)+MessageFormat.format(maps.get(CONTENT_VIEW_JOB_INPUT),configuration.getLong(FILTER_TIME,0L)));
    FileInputFormat.addInputPath(contentView, new Path(maps.get(CONTENT_VIEW_JOB_INPUT)));
    FileOutputFormat.setOutputPath(contentView, new Path(maps.get(CONTENT_VIEW_JOB_OUTPUT)));
    contentView.setNumReduceTasks(0);
    ControlledJob contentViewControlledJob = new ControlledJob(configuration);
    contentViewControlledJob.setJob(contentView);

    //reply
    Job replay = Job.getInstance(configuration, "reply job");
    replay.setJarByClass(getClass());
    replay.setMapperClass(ReplayMapper.class);
    replay.setOutputKeyClass(NullWritable.class);
    replay.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(replay, new Path(maps.get(LOOK_BACK_JOB_INPUT)));
    FileOutputFormat.setOutputPath(replay, new Path(maps.get(LOOK_BACK_JOB_OUTPUT)));
    replay.setNumReduceTasks(0);
    ControlledJob replayControlledJob = new ControlledJob(configuration);
    replayControlledJob.setJob(replay);

    //logEpg
    Job logEpg = Job.getInstance(configuration, "log epg job");
    logEpg.setJarByClass(getClass());
    logEpg.setMapperClass(EPGOperateMapper.class);
    logEpg.setOutputKeyClass(NullWritable.class);
    logEpg.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(logEpg, new Path(maps.get(LOG_EPG_JOB_INPUT)));
    FileOutputFormat.setOutputPath(logEpg, new Path(maps.get(LOG_EPG_JOB_OUTPUT)));
    logEpg.setNumReduceTasks(0);
    ControlledJob logEpgControlledJob = new ControlledJob(configuration);
    logEpgControlledJob.setJob(logEpg);

    //        //cdn
    //        Job cdn=Job.getInstance(configuration,"cdn job");
    //        cdn.setJarByClass(getClass());
    //        cdn.setMapperClass(CdnModuleMapper.class);
    //        cdn.setOutputKeyClass(NullWritable.class);
    //        cdn.setOutputValueClass(Text.class);
    //        FileInputFormat.addInputPath(cdn, new Path(maps.get(CDN_JOB_INPUT)));
    //        FileOutputFormat.setOutputPath(cdn, new Path(maps.get(CDN_JOB_OUTPUT)));
    //        cdn.setNumReduceTasks(0);
    //        ControlledJob cdnControlledJob=new ControlledJob(configuration);
    //        cdnControlledJob.setJob(cdn);
    //cdn stb
    Job cdn = Job.getInstance(configuration, "cdn stb job");
    cdn.setJarByClass(this.getClass());
    cdn.setMapperClass(CdnStbMapper.class);
    cdn.setOutputValueClass(Text.class);
    cdn.setOutputKeyClass(NullWritable.class);
    FileInputFormat.addInputPath(cdn, new Path(maps.get(CDN_JOB_INPUT)));
    FileOutputFormat.setOutputPath(cdn, new Path(maps.get(CDN_JOB_OUTPUT)));
    cdn.setNumReduceTasks(0);
    ControlledJob cdnControlledJob = new ControlledJob(configuration);
    cdnControlledJob.setJob(cdn);
    //cdn adapter

    Job cdnAdapterJob = Job.getInstance(configuration, "cdn adapter job ");
    cdnAdapterJob.setJarByClass(getClass());
    cdnAdapterJob.setMapperClass(CdnAdapterMapper.class);
    cdnAdapterJob.setOutputKeyClass(NullWritable.class);
    cdnAdapterJob.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(cdnAdapterJob, new Path(maps.get(CDN_ADAPTER_JOB_INPUT)));
    FileOutputFormat.setOutputPath(cdnAdapterJob, new Path(maps.get(CDN_ADAPTER_JOB_OUTPUT)));
    cdnAdapterJob.setNumReduceTasks(0);
    ControlledJob cdnAdapterControlleredJob = new ControlledJob(configuration);
    cdnAdapterControlleredJob.setJob(cdnAdapterJob);

    JobControl jobControl = new JobControl("stb log parser .eg: userLogin,devicePlayer,contentView,logEpg,cdn");
    jobControl.addJob(userControlledJob);
    jobControl.addJob(playControlledJob);
    jobControl.addJob(contentViewControlledJob);
    jobControl.addJob(replayControlledJob);
    jobControl.addJob(logEpgControlledJob);
    jobControl.addJob(cdnControlledJob);
    jobControl.addJob(cdnAdapterControlleredJob);
    new Thread(jobControl).start();
    while (!jobControl.allFinished()) {
        Thread.sleep(5000);
    }
    if (jobControl.getFailedJobList().size() > 0) {
        return false;
    }
    return true;
}