List of usage examples for org.apache.hadoop.mapred.jobcontrol JobControl JobControl
public JobControl(String groupName)
From source file:adept.pipeline.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { JobControl jc = new JobControl("Job Control"); /** mapping from classname to actual Hadoop Job instance that represents this job*///from ww w . jav a 2 s.c o m Map<String, Job> classnamesToJobs = new HashMap<String, Job>(); if (args.length < 1) { System.out.println("Please provide the configuration file path as a command line argument."); System.exit(1); } // read configuration file //BufferedReader br = new BufferedReader(new InputStreamReader(Reader.findStreamInClasspathOrFileSystem("pipeline/pipelineconfig.txt"))); BufferedReader br = new BufferedReader( new InputStreamReader(Reader.findStreamInClasspathOrFileSystem(args[0]))); String line; //add jobs to job control while ((line = br.readLine()) != null) { String[] temp = line.split("\t"); if (temp.length != 4) { System.out.println("The pipeline configuration file format is incorrect."); System.out.println("The correct format is: "); System.out.println("classname dependency classnames inputfile outputdir"); System.out.println("Please correct this issue and try again!"); } Job job = new Job(MapReduce.getInstance().getConfiguration(temp[2], temp[3], temp[0])); // add dependencies if (!temp[1].equals("")) { String[] dependencies = temp[1].split(","); //TODO: Modify this for complex dependencies to // add fusion job for (String dependency : dependencies) { //System.out.println("dependency is: " + dependency); if (classnamesToJobs.get(dependency) == null) { System.out .println("Dependency job not found. Please check your pipeline configuration file." + "All dependencies need to have been created first."); System.exit(1); } job.addDependingJob(classnamesToJobs.get(dependency)); } } // populate map classnamesToJobs.put(temp[0], job); jc.addJob(job); } handleRun(jc); }
From source file:com.ebay.erl.mobius.core.MobiusJobRunner.java
License:Apache License
/** * Submit the <code>tool</code> with the specified <code>conf</code> and <code>args</code>. * <p>//from ww w .ja va2 s. com * * <code>tool</code> can be {@link MobiusJob} or any instance of {@link org.apache.hadoop.util.Tool}. * If <code>tool</code> is an instance of {@link MobiusJob}, it will be submitted using * {@link JobControl}. If it's not an instance of {@link MobiusJob}, then it will be submitted * using <code>ToolRunner.run(conf, tool, args)</code> directly. * */ public static int run(Configuration conf, Tool tool, String[] args) throws Exception { if (tool instanceof MobiusJob) { MobiusJob mobiusJob = (MobiusJob) tool; int exit; try { exit = ToolRunner.run(conf, tool, args); } catch (Throwable t) { t.printStackTrace(); exit = 1; } if (exit == 0) { // setup correctly JobControl control = new JobControl("Mobius Job [" + tool.getClass().getCanonicalName() + "]"); Collection<Job> allJobs = mobiusJob.jobTopology.values(); control.addJobs(allJobs); LOGGER.info(allJobs.size() + " Hadoop job(s) to run."); Thread t = new Thread(control); t.start(); StatusCheckingThread statusChecking = new StatusCheckingThread(tool, allJobs, control); statusChecking.start(); statusChecking.join(); LOGGER.info(" All job(s) done."); statusChecking.complete(); int exitCode = control.getFailedJobs().size() == 0 ? 0 : 1; mobiusJob.deleteTempFiles(); return exitCode; } else { mobiusJob.deleteTempFiles(); return exit; } } else { return ToolRunner.run(conf, tool, args); } }
From source file:com.jbw.jobcontrol.Patent.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job1 = Job.getInstance(conf);/*from ww w .j a va 2 s . com*/ job1.setJobName("test"); job1.setJarByClass(Patent.class); ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf); job1.setReducerClass(IntSumReducer.class); Job job2 = Job.getInstance(); ControlledJob cjob1 = new ControlledJob(job1.getConfiguration()); ControlledJob cjob2 = new ControlledJob(job2.getConfiguration()); cjob2.addDependingJob(cjob1); JobControl jc = new JobControl("process job"); jc.addJob(cjob1); jc.addJob(cjob2); Thread t = new Thread(jc); t.start(); while (true) { for (ControlledJob j : jc.getRunningJobList()) { break; } break; } return 0; }
From source file:crunch.MaxTemperature.java
License:Apache License
/** * Start the program.//from w ww . ja v a 2 s . com * * @param inputDir The path to the folder containing the raw listening data files. * @param outputPath The path for the results to be output to. * @throws IOException If an error occurs retrieving data from the file system or an error occurs running the job. */ public void start(Path inputDir, Path outputDir) throws IOException { FileSystem fs = FileSystem.get(this.conf); JobConf uniqueListenersConf = getUniqueListenersJobConf(inputDir); Path listenersOutputDir = FileOutputFormat.getOutputPath(uniqueListenersConf); Job listenersJob = new Job(uniqueListenersConf); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(uniqueListenersConf))) { fs.delete(FileOutputFormat.getOutputPath(uniqueListenersConf), true); } JobConf sumConf = getSumJobConf(inputDir); Path sumOutputDir = FileOutputFormat.getOutputPath(sumConf); Job sumJob = new Job(sumConf); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(sumConf))) { fs.delete(FileOutputFormat.getOutputPath(sumConf), true); } // the merge job depends on the other two jobs ArrayList<Job> mergeDependencies = new ArrayList<Job>(); mergeDependencies.add(listenersJob); mergeDependencies.add(sumJob); JobConf mergeConf = getMergeConf(outputDir, sumOutputDir, listenersOutputDir); Job mergeJob = new Job(mergeConf, mergeDependencies); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(mergeConf))) { fs.delete(FileOutputFormat.getOutputPath(mergeConf), true); } // store the output paths of the intermediate jobs so this can be cleaned up after a successful run List<Path> deletePaths = new ArrayList<Path>(); deletePaths.add(FileOutputFormat.getOutputPath(uniqueListenersConf)); deletePaths.add(FileOutputFormat.getOutputPath(sumConf)); JobControl control = new JobControl("TrackStatisticsProgram"); control.addJob(listenersJob); control.addJob(sumJob); control.addJob(mergeJob); // execute the jobs try { Thread jobControlThread = new Thread(control, "jobcontrol"); jobControlThread.start(); while (!control.allFinished()) { Thread.sleep(1000); } if (control.getFailedJobs().size() > 0) { throw new IOException("One or more jobs failed"); } } catch (InterruptedException e) { throw new IOException("Interrupted while waiting for job control to finish", e); } // remove intermediate output paths for (Path deletePath : deletePaths) { fs.delete(deletePath, true); } }
From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java
License:Apache License
/** * Start the program.//w w w .j a va2s .c o m * * @param inputDir The path to the folder containing the raw listening data files. * @param outputPath The path for the results to be output to. * @throws IOException If an error occurs retrieving data from the file system or an error occurs running the job. */ public void start(Path inputDir, Path outputDir) throws IOException { FileSystem fs = FileSystem.get(this.conf); JobConf uniqueListenersConf = getUniqueListenersJobConf(inputDir); Path listenersOutputDir = FileOutputFormat.getOutputPath(uniqueListenersConf); Job listenersJob = new Job(uniqueListenersConf); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(uniqueListenersConf))) { fs.delete(FileOutputFormat.getOutputPath(uniqueListenersConf), true); } JobConf sumConf = getSumJobConf(inputDir); Path sumOutputDir = FileOutputFormat.getOutputPath(sumConf); Job sumJob = new Job(sumConf); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(sumConf))) { fs.delete(FileOutputFormat.getOutputPath(sumConf), true); } // the merge job depends on the other two jobs ArrayList<Job> mergeDependencies = new ArrayList<Job>(); mergeDependencies.add(listenersJob); mergeDependencies.add(sumJob); JobConf mergeConf = getMergeConf(outputDir, sumOutputDir, listenersOutputDir); Job mergeJob = new Job(mergeConf, mergeDependencies); // delete any output that might exist from a previous run of this job if (fs.exists(FileOutputFormat.getOutputPath(mergeConf))) { fs.delete(FileOutputFormat.getOutputPath(mergeConf), true); } // store the output paths of the intermediate jobs so this can be cleaned up after a successful run List<Path> deletePaths = new ArrayList<Path>(); deletePaths.add(FileOutputFormat.getOutputPath(uniqueListenersConf)); deletePaths.add(FileOutputFormat.getOutputPath(sumConf)); JobControl control = new JobControl("TrackStatisticsProgram"); control.addJob(listenersJob); control.addJob(sumJob); control.addJob(mergeJob); // execute the jobs try { Thread jobControlThread = new Thread(control, "jobcontrol"); jobControlThread.start(); while (!control.allFinished()) { Thread.sleep(1000); } if (control.getFailedJobs().size() > 0) { throw new IOException("One or more jobs failed"); } } catch (InterruptedException e) { throw new IOException("Interrupted while waiting for job control to finish", e); } // remove intermediate output paths for (Path deletePath : deletePaths) { fs.delete(deletePath, true); } }
From source file:org.apache.pig.test.pigmix.mapreduce.L1.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//from w w w. j a v a2 s . c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L1.class); lp.setJobName("L1 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(IntWritable.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Group.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L1out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L1 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L10.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*w w w . j a v a 2 s. c om*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L10.class); lp.setJobName("L10 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(MyType.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setReducerClass(Group.class); lp.setPartitionerClass(MyPartitioner.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out")); // Hardcode the parallel to 40 since MyPartitioner assumes it lp.setNumReduceTasks(40); Job group = new Job(lp); JobControl jc = new JobControl("L10 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L11.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*w w w.j av a2 s .co m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L11.class); lp.setJobName("L11 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(ReadPageViews.class); lp.setReducerClass(ReadPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/p")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L11.class); lu.setJobName("L11 Load Widerow"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadWideRow.class); lu.setCombinerClass(ReadWideRow.class); lu.setReducerClass(ReadWideRow.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/widerow")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/wr")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadWideRow = new Job(lu); JobConf join = new JobConf(L11.class); join.setJobName("L11 Union WideRow and Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setCombinerClass(Union.class); join.setReducerClass(Union.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/p")); FileInputFormat.addInputPath(join, new Path(outputDir + "/wr")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L11out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadWideRow); JobControl jc = new JobControl("L11 join"); jc.addJob(loadPages); jc.addJob(loadWideRow); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L12.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from w ww .j a va 2 s . com*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L12.class); lp.setJobName("L12 Find Highest Value Page Per User"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(DoubleWritable.class); lp.setMapperClass(HighestValuePagePerUser.class); lp.setCombinerClass(HighestValuePagePerUser.class); lp.setReducerClass(HighestValuePagePerUser.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/highest_value_page_per_user")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L12.class); lu.setJobName("L12 Find Total Timespent per Term"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(LongWritable.class); lu.setMapperClass(TotalTimespentPerTerm.class); lu.setCombinerClass(TotalTimespentPerTerm.class); lu.setReducerClass(TotalTimespentPerTerm.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/total_timespent_per_term")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadUsers = new Job(lu); JobConf join = new JobConf(L12.class); join.setJobName("L12 Find Queries Per Action"); join.setInputFormat(TextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(LongWritable.class); join.setMapperClass(QueriesPerAction.class); join.setCombinerClass(QueriesPerAction.class); join.setReducerClass(QueriesPerAction.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/queries_per_action")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); JobControl jc = new JobControl("L12 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L13.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//from w w w . j av a2 s . c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L13.class); lp.setJobName("L13 Load Left Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadLeftPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_left_pages")); lp.setNumReduceTasks(0); Job loadPages = new Job(lp); JobConf lu = new JobConf(L13.class); lu.setJobName("L13 Load Right Page Views"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadRightPageViews.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/power_users_samples")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_right_pages")); lu.setNumReduceTasks(0); Job loadUsers = new Job(lu); JobConf join = new JobConf(L13.class); join.setJobName("L13 Join Two Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setReducerClass(Join.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_left_pages")); FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_right_pages")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L13out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadUsers); JobControl jc = new JobControl("L13 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }