List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol JobControl JobControl
public JobControl(String groupName)
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);// w ww. j a va 2 s . c o m } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.link_back.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 4) { System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n", getClass().getSimpleName()); System.exit(1);// ww w . j ava 2 s . com } String pre_output = args[3] + "/pre"; String step1_output = args[3] + "/step1"; String step2_output = args[3] + "/final"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("link back jobs"); Driver preDriver = new Driver(); String[] preArgs = new String[2]; preArgs[0] = args[0]; preArgs[1] = pre_output; Job preJob = preDriver.configJob(preArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver(); String[] step1Args = new String[3]; step1Args[0] = args[2]; step1Args[1] = args[1]; step1Args[2] = step1_output; Job step1Job = step1Driver.configJob(step1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(step1Job); jobControl.addJob(controlledJob1); clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver(); String[] args2 = new String[3]; args2[0] = pre_output; args2[1] = step1_output; args2[2] = step2_output; Job job2 = driver2.configJob(args2); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledPreJob); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);//from w ww . j a va 2 s . c o m } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.simhash.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n", getClass().getSimpleName()); System.exit(1);/* w w w .j a v a 2 s. co m*/ } Path step1_outputDir = new Path(args[1] + "/step1"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("simhash.threshold", Integer.valueOf(args[2])); } else { conf.setInt("simhash.threshold", 3); } JobControl jobControl = new JobControl("simhash jobs"); Job job1 = Job.getInstance(conf, "simhash step1 job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Step1Mapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Text.class); job1.setReducerClass(Step1Reducer.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, step1_outputDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); Job job2 = Job.getInstance(conf, "simhash step2 job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, step1_outputDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Step2Mapper.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(Text.class); job2.setReducerClass(Step2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result")); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); long starttime = System.currentTimeMillis(); clustering.Utils.MapReduceUtils.runJobs(jobControl); boolean complete = job2.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.tf_idf.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n", getClass().getSimpleName()); System.exit(1);/*w w w . ja v a2 s . co m*/ } String docCntDir = args[1] + "/docCnt"; String step1_outputDir = args[1] + "/step1"; String step2_outputDir = args[1] + "/step2"; String step3_outputDir = args[1] + "/result"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("tf-idf jobs"); /* pre step, count documents number in the corpus */ DocCntDriver docCntDriver = new DocCntDriver(); String[] preJobArgs = new String[2]; preJobArgs[0] = args[0]; preJobArgs[1] = docCntDir; Job preJob = docCntDriver.configJob(preJobArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); /* step 1, calculate term count of each document */ TermCntDriver termCntDriver = new TermCntDriver(); String[] job1Args = new String[2]; job1Args[0] = args[0]; job1Args[1] = step1_outputDir; Job job1 = termCntDriver.configJob(job1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate the term frequency of each document */ TermFreqDriver termFreqDriver = new TermFreqDriver(); String gnameWeight = args.length > 2 ? args[2] : "1.0"; conf.setDouble("gname.weight", Double.valueOf(gnameWeight)); String[] job2Args = args.length > 2 ? new String[3] : new String[2]; job2Args[0] = step1_outputDir; job2Args[1] = step2_outputDir; if (args.length > 2) { job2Args[2] = args[2]; } Job job2 = termFreqDriver.configJob(job2Args); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); /* step 3, calculate tf_idf */ TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver(); String[] job3Args = new String[3]; job3Args[0] = docCntDir; job3Args[1] = step2_outputDir; job3Args[2] = step3_outputDir; Job job3 = tf_idf_driver.configJob(job3Args); ControlledJob controlledJob3 = new ControlledJob(conf); controlledJob3.setJob(job3); controlledJob3.addDependingJob(controlledJob2); controlledJob3.addDependingJob(controlledPreJob); jobControl.addJob(controlledJob3); // run jobs runJobs(jobControl); return job3.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.crunch.impl.mr.exec.MRExecutor.java
License:Open Source License
public MRExecutor(Class<?> jarClass) { this.control = new JobControl(jarClass.toString()); }
From source file:com.laizuozuoba.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w .j av a2s. com } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); Job job2 = new Job(conf, "uv"); job2.setJarByClass(WordCount.class); job2.setMapperClass(UVMapper.class); job2.setCombinerClass(UVReducer.class); job2.setReducerClass(UVReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2")); ControlledJob controlledJob = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); controlledJob2.addDependingJob(controlledJob); JobControl jc = new JobControl("123"); jc.addJob(controlledJob); jc.addJob(controlledJob2); Thread jcThread = new Thread(jc); jcThread.start(); while (true) { if (jc.allFinished()) { System.out.println(jc.getSuccessfulJobList()); jc.stop(); break; } if (jc.getFailedJobList().size() > 0) { System.out.println(jc.getFailedJobList()); jc.stop(); break; } Thread.sleep(1000); } System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!"); }
From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java
License:Apache License
public static void main(String[] args) throws Exception { HadoopUtil.isWinOrLiux();//www. ja va2 s . c o m Configuration conf = new Configuration(); String path = "hdfs://ns1:9000/user/root"; if (args.length != 0) { path = args[0]; } String[] args_1 = new String[] { path + "/chubao/input/contact", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total", path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" }; String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs(); // job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(JobControlTest.class); job.setMapperClass(UserIdMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // deleteOutputFile(otherArgs[1], otherArgs[0]); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // job Job job2 = Job.getInstance(conf, "job2"); job2.setJarByClass(JobControlTest.class); job2.setMapperClass(AddDateMapper.class); job2.setReducerClass(Job2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); // deleteOutputFile(otherArgs[2], otherArgs[1]); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); // ControlledJob ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); // ? controlledJob2.addDependingJob(controlledJob1); // JobControl JobControl jobControl = new JobControl("JobControlDemoGroup"); jobControl.addJob(controlledJob1); jobControl.addJob(controlledJob2); // ? Thread jobControlThread = new Thread(jobControl); jobControlThread.start(); while (true) { if (jobControl.allFinished()) { System.out.println(jobControl.getSuccessfulJobList()); jobControl.stop(); break; } } }
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.MeanJob.java
License:Apache License
public Double call() throws NectarException { double value = 0; JobControl jobControl = new JobControl("mean job"); try {// w w w .ja v a 2 s .c o m job = new Job(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setJarByClass(MeanJob.class); log.info("Mean Job initialized"); log.warn("Mean job: Processing...Do not terminate/close"); log.debug("Mean job: Mapping process started"); try { ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, MeanMapper.class, NullWritable.class, Text.class, Text.class, DoubleWritable.class, job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.getConfiguration().set("fields.spec", "" + column); job.getConfiguration().setInt("n", n); job.setReducerClass(DoubleSumReducer.class); try { FileInputFormat.addInputPath(job, new Path(inputFilePath)); fs = FileSystem.get(job.getConfiguration()); if (!fs.exists(new Path(inputFilePath))) { throw new NectarException("Exception occured:File " + inputFilePath + " not found "); } } catch (Exception e) { // TODO Auto-generated catch block String trace = new String(); log.error(e.toString()); for (StackTraceElement s : e.getStackTrace()) { trace += "\n\t at " + s.toString(); } log.debug(trace); log.debug("Mean Job terminated abruptly\n"); throw new NectarException(); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); log.debug("Mean job: Mapping process completed"); log.debug("Mean job: Reducing process started"); try { controlledJob = new ControlledJob(job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } jobControl.addJob(controlledJob); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } jobControl.stop(); try { FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine = bufferedReader.readLine(); String[] fields = valueLine.split("\t"); value = Double.parseDouble(fields[1]); bufferedReader.close(); in.close(); } catch (IOException e) { log.error("Exception occured: Output file cannot be read."); log.debug(e.getMessage()); log.debug("Mean Job terminated abruptly\n"); throw new NectarException(); } log.debug("Mean job: Reducing process completed"); log.info("Mean Job completed\n"); return value; }
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaJob.java
License:Apache License
public Double call() throws NectarException { double value = 0; JobControl jobControl = new JobControl("sigmajob"); try {/*from w ww . j a v a2 s . c o m*/ job = new Job(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } job.setJarByClass(SigmaJob.class); log.info("Sigma Job initialized"); log.warn("Sigma job: Processing...Do not terminate/close"); log.debug("Sigma job: Mapping process started"); try { ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, LongWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, SigmaMapper.class, NullWritable.class, Text.class, Text.class, DoubleWritable.class, job.getConfiguration()); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } job.getConfiguration().set("fields.spec", "" + column); job.setReducerClass(DoubleSumReducer.class); try { FileInputFormat.addInputPath(job, new Path(inputFilePath)); fs = FileSystem.get(job.getConfiguration()); if (!fs.exists(new Path(inputFilePath))) { throw new NectarException("Exception occured:File " + inputFilePath + " not found "); } } catch (Exception e2) { // TODO Auto-generated catch block String trace = new String(); log.error(e2.toString()); for (StackTraceElement s : e2.getStackTrace()) { trace += "\n\t at " + s.toString(); } log.debug(trace); log.debug("Sigma Job terminated abruptly\n"); throw new NectarException(); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setMapOutputValueClass(DoubleWritable.class); job.setMapOutputKeyClass(Text.class); job.setInputFormatClass(TextInputFormat.class); log.debug("Sigma job: Mapping process completed"); log.debug("Sigma job: Reducing process started"); try { controlledJob = new ControlledJob(job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } jobControl.addJob(controlledJob); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine = bufferedReader.readLine(); String[] fields = valueLine.split("\t"); value = Double.parseDouble(fields[1]); bufferedReader.close(); in.close(); } catch (IOException e) { // TODO Auto-generated catch block log.error("Exception occured: Output file cannot be read."); log.debug(e.getMessage()); log.debug("Sigma Job terminated abruptly\n"); throw new NectarException(); } log.debug("Sigma job: Reducing process completed"); log.info("Sigma Job completed\n"); return value; }