Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl addJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl addJob.

Prototype

synchronized public String addJob(Job aJob)

Source Link

Document

Add a new job.

Usage

From source file:clustering.inverted_index.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);/*from   w  w w  .  jav  a  2 s.c  o  m*/
    }

    Path normDir = new Path(args[1] + "/normed");
    Path resultDir = new Path(args[1] + "/result");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("deci.number", Integer.valueOf(args[2]));
    } else {
        conf.setInt("deci.number", 4);
    }

    if (args.length > 3) {
        conf.setBoolean("pruning", true);
        conf.setDouble("pruning.threshold", Double.valueOf(args[3]));
    } else {
        conf.setBoolean("pruning", false);
    }

    JobControl jobControl = new JobControl("inverted-index jobs");

    /* step 1, normalize the vector lenth of each document */

    Job job1 = Job.getInstance(conf, "tf idf normalizer job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Mapper.class);

    job1.setReducerClass(NormalizerReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, normDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate inverted index */

    Job job2 = Job.getInstance(conf, "inverted index job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, normDir);

    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Mapper.class);

    job2.setReducerClass(InvertedIndexReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, resultDir);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.link_back.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n",
                getClass().getSimpleName());
        System.exit(1);/*  w ww. j a v  a 2s. co  m*/
    }

    String pre_output = args[3] + "/pre";
    String step1_output = args[3] + "/step1";
    String step2_output = args[3] + "/final";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("link back jobs");

    Driver preDriver = new Driver();
    String[] preArgs = new String[2];
    preArgs[0] = args[0];
    preArgs[1] = pre_output;
    Job preJob = preDriver.configJob(preArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver();
    String[] step1Args = new String[3];
    step1Args[0] = args[2];
    step1Args[1] = args[1];
    step1Args[2] = step1_output;
    Job step1Job = step1Driver.configJob(step1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(step1Job);
    jobControl.addJob(controlledJob1);

    clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver();
    String[] args2 = new String[3];
    args2[0] = pre_output;
    args2[1] = step1_output;
    args2[2] = step2_output;
    Job job2 = driver2.configJob(args2);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledPreJob);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    MapReduceUtils.runJobs(jobControl);

    return job2.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.mst.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.printf("usage: %s similarity_result_dir document_count_file output_dir "
                + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName());
        System.exit(1);//from   w  w w  . j  a v  a  2  s . com
    }

    Path step1_OutputDir = new Path(args[2] + "/step1");
    Path resultDir = new Path(args[2] + "/result");

    URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 3) {
        conf.setDouble("final.threshold", Double.valueOf(args[3]));
    } else {
        conf.setDouble("final.threshold", 0.2d);
    }
    if (args.length > 4) {
        conf.setInt("reduce.task.num", Integer.valueOf(args[4]));
    } else {
        conf.setInt("reduce.task.num", 5);
    }

    JobControl jobControl = new JobControl("mst jobs");

    /* step 1, split and calculate the child msts */

    Job childJob = Job.getInstance(conf, "mst child job");
    childJob.setJarByClass(Driver.class);

    childJob.addCacheFile(docCntFile);

    if (args.length > 5 && args[5].equals("0")) {
        FileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(KeyValueTextInputFormat.class);
    } else {
        SequenceFileInputFormat.addInputPath(childJob, new Path(args[0]));
        childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class);
    }

    FileOutputFormat.setOutputPath(childJob, step1_OutputDir);

    childJob.setMapperClass(ChildMapper.class);
    childJob.setMapOutputKeyClass(DoubleWritable.class);
    childJob.setMapOutputValueClass(Text.class);

    childJob.setPartitionerClass(ChildPartitioner.class);

    childJob.setReducerClass(ChildReducer.class);
    childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1));
    childJob.setOutputKeyClass(DoubleWritable.class);
    childJob.setOutputValueClass(Text.class);

    ControlledJob controlledChildJob = new ControlledJob(conf);
    controlledChildJob.setJob(childJob);
    jobControl.addJob(controlledChildJob);

    /* step 2, merge step 1's output and calculate final mst */

    Job finalJob = Job.getInstance(conf, "mst final job");
    finalJob.setJarByClass(FinalReducer.class);

    finalJob.addCacheFile(docCntFile);

    FileInputFormat.addInputPath(finalJob, step1_OutputDir);
    finalJob.setInputFormatClass(KeyValueTextInputFormat.class);

    finalJob.setMapperClass(FinalMapper.class);
    finalJob.setMapOutputKeyClass(DoubleWritable.class);
    finalJob.setMapOutputValueClass(Text.class);

    finalJob.setReducerClass(FinalReducer.class);
    finalJob.setOutputKeyClass(IntWritable.class);
    finalJob.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(finalJob, resultDir);

    ControlledJob finalControlledJob = new ControlledJob(conf);
    finalControlledJob.setJob(finalJob);
    finalControlledJob.addDependingJob(controlledChildJob);
    jobControl.addJob(finalControlledJob);

    // run jobs

    MapReduceUtils.runJobs(jobControl);

    return finalJob.waitForCompletion(true) ? 0 : 1;
}

From source file:clustering.simhash.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n",
                getClass().getSimpleName());
        System.exit(1);//from w w  w .j  a  v a 2s  .  c om
    }

    Path step1_outputDir = new Path(args[1] + "/step1");

    Configuration conf = getConf();
    conf = MapReduceUtils.initConf(conf);

    if (args.length > 2) {
        conf.setInt("simhash.threshold", Integer.valueOf(args[2]));
    } else {
        conf.setInt("simhash.threshold", 3);
    }

    JobControl jobControl = new JobControl("simhash jobs");

    Job job1 = Job.getInstance(conf, "simhash step1 job");
    job1.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    job1.setMapperClass(Step1Mapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(Text.class);

    job1.setReducerClass(Step1Reducer.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job1, step1_outputDir);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    Job job2 = Job.getInstance(conf, "simhash step2 job");
    job2.setJarByClass(Driver.class);

    FileInputFormat.addInputPath(job2, step1_outputDir);
    job2.setInputFormatClass(KeyValueTextInputFormat.class);

    job2.setMapperClass(Step2Mapper.class);
    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setReducerClass(Step2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result"));

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    long starttime = System.currentTimeMillis();
    clustering.Utils.MapReduceUtils.runJobs(jobControl);

    boolean complete = job2.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:clustering.tf_idf.WorkflowDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n",
                getClass().getSimpleName());
        System.exit(1);//from w  ww. ja  v  a  2s .  c  o  m
    }

    String docCntDir = args[1] + "/docCnt";
    String step1_outputDir = args[1] + "/step1";
    String step2_outputDir = args[1] + "/step2";
    String step3_outputDir = args[1] + "/result";

    Configuration conf = getConf();
    conf = initConf(conf);

    JobControl jobControl = new JobControl("tf-idf jobs");

    /* pre step, count documents number in the corpus */
    DocCntDriver docCntDriver = new DocCntDriver();
    String[] preJobArgs = new String[2];
    preJobArgs[0] = args[0];
    preJobArgs[1] = docCntDir;

    Job preJob = docCntDriver.configJob(preJobArgs);

    ControlledJob controlledPreJob = new ControlledJob(conf);
    controlledPreJob.setJob(preJob);
    jobControl.addJob(controlledPreJob);

    /* step 1, calculate term count of each document */
    TermCntDriver termCntDriver = new TermCntDriver();
    String[] job1Args = new String[2];
    job1Args[0] = args[0];
    job1Args[1] = step1_outputDir;
    Job job1 = termCntDriver.configJob(job1Args);

    ControlledJob controlledJob1 = new ControlledJob(conf);
    controlledJob1.setJob(job1);
    jobControl.addJob(controlledJob1);

    /* step 2, calculate the term frequency of each document */
    TermFreqDriver termFreqDriver = new TermFreqDriver();

    String gnameWeight = args.length > 2 ? args[2] : "1.0";
    conf.setDouble("gname.weight", Double.valueOf(gnameWeight));

    String[] job2Args = args.length > 2 ? new String[3] : new String[2];
    job2Args[0] = step1_outputDir;
    job2Args[1] = step2_outputDir;
    if (args.length > 2) {
        job2Args[2] = args[2];
    }
    Job job2 = termFreqDriver.configJob(job2Args);

    ControlledJob controlledJob2 = new ControlledJob(conf);
    controlledJob2.setJob(job2);
    controlledJob2.addDependingJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    /* step 3, calculate tf_idf */
    TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver();
    String[] job3Args = new String[3];
    job3Args[0] = docCntDir;
    job3Args[1] = step2_outputDir;
    job3Args[2] = step3_outputDir;
    Job job3 = tf_idf_driver.configJob(job3Args);

    ControlledJob controlledJob3 = new ControlledJob(conf);
    controlledJob3.setJob(job3);
    controlledJob3.addDependingJob(controlledJob2);
    controlledJob3.addDependingJob(controlledPreJob);

    jobControl.addJob(controlledJob3);

    // run jobs
    runJobs(jobControl);

    return job3.waitForCompletion(true) ? 0 : 1;
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w ww  .  j av a  2s .  c o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();/*  w ww  . j a v a 2s.  co  m*/
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.MeanJob.java

License:Apache License

public Double call() throws NectarException {
    double value = 0;
    JobControl jobControl = new JobControl("mean job");
    try {/*  w ww.ja v  a  2 s .c o  m*/
        job = new Job();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.setJarByClass(MeanJob.class);
    log.info("Mean Job initialized");
    log.warn("Mean job: Processing...Do not terminate/close");
    log.debug("Mean job: Mapping process started");

    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, MeanMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.getConfiguration().set("fields.spec", "" + column);
    job.getConfiguration().setInt("n", n);

    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e.toString());
        for (StackTraceElement s : e.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Mean Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Mean job: Mapping process completed");

    log.debug("Mean job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    jobControl.stop();
    try {
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Mean Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Mean job: Reducing process completed");
    log.info("Mean Job completed\n");
    return value;
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaJob.java

License:Apache License

public Double call() throws NectarException {
    double value = 0;
    JobControl jobControl = new JobControl("sigmajob");
    try {//from   ww w .j a  v  a 2 s.  c om
        job = new Job();
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    job.setJarByClass(SigmaJob.class);
    log.info("Sigma Job initialized");
    log.warn("Sigma job: Processing...Do not terminate/close");
    log.debug("Sigma job: Mapping process started");
    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, LongWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, SigmaMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    job.getConfiguration().set("fields.spec", "" + column);

    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e2) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e2.toString());
        for (StackTraceElement s : e2.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Sigma Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Sigma job: Mapping process completed");

    log.debug("Sigma job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    try {
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Sigma Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Sigma job: Reducing process completed");
    log.info("Sigma Job completed\n");
    return value;
}

From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaSqJob.java

License:Apache License

public Double call() throws NectarException {
    // TODO Auto-generated method stub
    double value = 0;
    JobControl jobControl = new JobControl("sigmajob");
    try {//  w  w  w.j  ava  2  s. c  o m
        job = new Job();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.setJarByClass(SigmaSqJob.class);
    log.info("Sigma square Job initialized");
    log.warn("Sigma square job: Processing...Do not terminate/close");
    log.debug("Sigma square job: Mapping process started");

    try {
        ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class,
                NullWritable.class, Text.class, job.getConfiguration());
        ChainMapper.addMapper(job, SigmaSqMapper.class, NullWritable.class, Text.class, Text.class,
                DoubleWritable.class, job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    job.getConfiguration().set("fields.spec", "" + column);
    job.setReducerClass(DoubleSumReducer.class);
    try {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
        fs = FileSystem.get(job.getConfiguration());
        if (!fs.exists(new Path(inputFilePath))) {
            throw new NectarException("Exception occured:File " + inputFilePath + " not found ");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        String trace = new String();
        log.error(e.toString());
        for (StackTraceElement s : e.getStackTrace()) {
            trace += "\n\t at " + s.toString();
        }
        log.debug(trace);
        log.debug("Sigma square Job terminated abruptly\n");
        throw new NectarException();
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(TextInputFormat.class);
    log.debug("Sigma square job: Mapping process completed");

    log.debug("Sigma square job: Reducing process started");
    try {
        controlledJob = new ControlledJob(job.getConfiguration());
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    jobControl.addJob(controlledJob);
    Thread thread = new Thread(jobControl);
    thread.start();
    while (!jobControl.allFinished()) {
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    jobControl.stop();
    try {
        fs = FileSystem.get(job.getConfiguration());
        FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000"));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
        String valueLine = bufferedReader.readLine();
        String[] fields = valueLine.split("\t");
        value = Double.parseDouble(fields[1]);
        bufferedReader.close();
        in.close();
    } catch (IOException e) {
        log.error("Exception occured: Output file cannot be read.");
        log.debug(e.getMessage());
        log.debug("Sigma square Job terminated abruptly\n");
        throw new NectarException();
    }
    log.debug("Sigma square job: Reducing process completed");
    log.info("Sigma square Job completed\n");
    return value;
}