Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl addJob

List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl addJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl addJob.

Prototype

synchronized public String addJob(Job aJob) 

Source Link

Document

Add a new job.

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*from   w  w w  .  jav  a  2 s.c  o  m*/
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.link_back.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n",
                getClass().getSimpleName());
        System.exit(1);/*  w ww. j a v  a 2s. co  m*/
    }

    String pre_output = args[3] + "/pre";
    String step1_output = args[3] + "/step1";
    String step2_output = args[3] + "/final";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("link back jobs");

    Driver preDriver = new Driver();
    String[] preArgs = new String[2];
    preArgs[0] = args[0];
    preArgs[1] = pre_output;
    Job preJob = preDriver.configJob(preArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver();
    String[] step1Args = new String[3];
    step1Args[0] = args[2];
    step1Args[1] = args[1];
    step1Args[2] = step1_output;
    Job step1Job = step1Driver.configJob(step1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(step1Job);
    jobControl.addJob(controlledJob1);

    clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver();
    String[] args2 = new String[3];
    args2[0] = pre_output;
    args2[1] = step1_output;
    args2[2] = step2_output;
    Job job2 = driver2.configJob(args2);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledPreJob);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);//from   w  w w  . j  a v  a  2  s . com
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//from w w  w .j  a  v a 2s  .  c om
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);//from w  ww. ja  v  a  2s .  c  o  m
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w ww  .  j av a  2s .  c o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*  w ww  . j a v a 2s.  co  m*/
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.MeanJob.java

License:Apache License

public Double call() throws NectarException {
    double value = 0;
    JobControl jobControl = new JobControl("mean job");
    try {/*  w ww.ja v  a  2 s .c o  m*/
        job = new Job();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.setJarByClass(MeanJob.class);
    log.info("Mean Job initialized");
    log.warn("Mean job: Processing...Do not terminate/close");
    log.debug("Mean job: Mapping process started");

    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, MeanMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.getConfiguration().set("fields.spec", "" + column);
    job.getConfiguration().setInt("n", n);

    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e.toString());
        for (StackTraceElement s : e.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Mean Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Mean job: Mapping process completed");

    log.debug("Mean job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    jobControl.stop();
    try {
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Mean Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Mean job: Reducing process completed");
    log.info("Mean Job completed\n");
    return value;
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaJob.java

License:Apache License

public Double call() throws NectarException {
    double value = 0;
    JobControl jobControl = new JobControl("sigmajob");
    try {//from   ww w .j a  v  a 2 s.  c om
        job = new Job();
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    job.setJarByClass(SigmaJob.class);
    log.info("Sigma Job initialized");
    log.warn("Sigma job: Processing...Do not terminate/close");
    log.debug("Sigma job: Mapping process started");
    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, LongWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, SigmaMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    job.getConfiguration().set("fields.spec", "" + column);

    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e2) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e2.toString());
        for (StackTraceElement s : e2.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Sigma Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Sigma job: Mapping process completed");

    log.debug("Sigma job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    try {
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Sigma Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Sigma job: Reducing process completed");
    log.info("Sigma Job completed\n");
    return value;
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaSqJob.java

License:Apache License

public Double call() throws NectarException {
    // TODO Auto-generated method stub
    double value = 0;
    JobControl jobControl = new JobControl("sigmajob");
    try {//  w  w  w.j  ava  2  s. c  o m
        job = new Job();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.setJarByClass(SigmaSqJob.class);
    log.info("Sigma square Job initialized");
    log.warn("Sigma square job: Processing...Do not terminate/close");
    log.debug("Sigma square job: Mapping process started");

    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, SigmaSqMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.getConfiguration().set("fields.spec", "" + column);
    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e.toString());
        for (StackTraceElement s : e.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Sigma square Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Sigma square job: Mapping process completed");

    log.debug("Sigma square job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    jobControl.stop();
    try {
        fs = FileSystem.get(job.getConfiguration());
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Sigma square Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Sigma square job: Reducing process completed");
    log.info("Sigma square Job completed\n");
    return value;
}