List of usage examples for org.apache.hadoop.mapred.jobcontrol Job getJobName
public String getJobName()
From source file:com.ebay.erl.mobius.core.MobiusJob.java
License:Apache License
/** * Add a job, represented by the <code>aNewJob</code> object, into the execution queue. * <p>// w w w.ja v a 2s . c o m * * Users can use this method to add one or more jobs' configuration into the job queue, and Mobius engine * will analyze the <code>aNewJob</code> objects within the queue to understand the dependence of jobs. * For example, if job B's input is from job A, then job B won't be submitted until A is completed * successfully. If A failed, the B will not be submitted. * <p> * * * @param aNewJobConf a {@link Configuration} object represents a Hadoop job. * @throws IOException */ protected void addToExecQueue(Configuration aNewJobConf) throws IOException { // Add the new job into execution engine and realize // its dependency, if any. // // To realize the job dependency, we need to analyze the input // path of this new job. // // The inputs of a job could be: // 1) if aNewJob is not a derived job (ex: result of another MR job), // then the inputs of the job can be retrieved from "mapred.input.dir", // or from {@link MultipleInputs} (ex, joining different type of dataset)/ // 2) if aNewJob is a derived job, the input is from the output of previous // MR job. String inputFolders = aNewJobConf.get("mapred.input.dir", ""); if (inputFolders.length() == 0) { // the value of "mapred.input.dir" is empty, assuming the inputs of this job // are coming from {@link MultipleInputs}. String multipleInputs = aNewJobConf .get("mapred.input.dir.mappers"/* for using old MultipleInputs, v0.20.X */, aNewJobConf.get( "mapreduce.input.multipleinputs.dir.formats"/* for new MultipleInputs, v0.23.X */, "")); if (multipleInputs.length() > 0) { // the input paths of this job is coming from MultipleInputs, extract the input paths. // The format from {@link MultipleInputs} is like: hadoop_path1;corresponding_mapper1,hadoop_path2;corresponding_mapper2... String[] pathAndMapperPairs = multipleInputs.split(","); for (String aPair : pathAndMapperPairs) { String[] pathToMapper = aPair.split(";"); String path = pathToMapper[0]; String mapper = pathToMapper[1]; if (inputFolders.length() == 0) { inputFolders = getPathOnly(path); } else { inputFolders = inputFolders + "," + getPathOnly(path); } } } else { throw new IllegalArgumentException("Cannot find input path(s) of job: [" + aNewJobConf.get("mapred.job.name") + "] from the following attributes: " + "mapred.input.dir, mapred.input.dir.mappers, nor mapreduce.input.multipleinputs.dir.formats. " + "Please specify the input path(s) of this job."); } } else { // the input path of this job is specified in mapred.input.dir inputFolders = getPathOnly(inputFolders); } //////////////////////////////////////////////////////////// // validate output path of this job, to ensure it doesn't // use the same folder of another job's output. //////////////////////////////////////////////////////////// String outputPath = aNewJobConf.get("mapred.output.dir", ""); if (outputPath.isEmpty()) throw new IllegalStateException( "Please specify the output directory of job:" + aNewJobConf.get("mapred.job.name")); if (this.isOutputOfAnotherJob(outputPath)) { throw new IllegalArgumentException("Job [" + aNewJobConf.get("mapred.job.name") + "]'s output [" + outputPath + "] is " + "the output of job[" + jobTopology.get(outputPath).getJobName() + "], " + "please make sure to use different output folder for each job."); } ////////////////////////////////////////////////////////////////// // pass all the validation, start to build the dependencies. ////////////////////////////////////////////////////////////////// Job newJob = new ConfigurableJob(new JobConf(aNewJobConf, this.getClass())); newJob.setJobName(aNewJobConf.get("mapred.job.name", aNewJobConf.get("mapreduce.job.name", "Mobius Job"))); for (String anInputOfNewJob : inputFolders.split(",")) { // Added to track inputs for local PC sampling inputPaths.add(anInputOfNewJob); Job dependsOn = jobTopology.get(this.getFS().makeQualified(new Path(anInputOfNewJob)).toUri()); if (dependsOn != null) { List<Job> dependingJobs = newJob.getDependingJobs(); boolean alreadyInDependency = dependingJobs != null && dependingJobs.contains(dependsOn); if (alreadyInDependency) { // already added, do nothing. } else { LOGGER.info(newJob.getJobName() + " depends on " + dependsOn.getJobName()); newJob.addDependingJob(dependsOn); } } } // put the output of this <code>newJob</code> into job topology // so that later if a job read this <code>newJob</code>'s output // as its input, then the system can detect the dependency. URI outputPathURI = this.getFS().makeQualified(new Path(outputPath)).toUri(); LOGGER.info("Adding Job:" + newJob.getJobName() + "\tOutput:[" + outputPath.toString() + "]"); jobTopology.put(outputPathURI, newJob); }
From source file:com.ebay.erl.mobius.core.MobiusJobRunner.java
License:Apache License
private static String jobToString(Job aJob) { StringBuffer sb = new StringBuffer(); sb.append("job mapred id:\t") .append(aJob.getAssignedJobID() == null ? "unassigned" : aJob.getAssignedJobID().toString()) .append("\t"); sb.append("job name: ").append(aJob.getJobName()).append("\n"); String state = "Unset"; switch (aJob.getState()) { case Job.DEPENDENT_FAILED: state = "DEPENDENT_FAILED"; break;/*ww w. j av a2 s . c o m*/ case Job.FAILED: state = "FAILED"; break; case Job.READY: state = "READY"; break; case Job.RUNNING: state = "RUNNING"; break; case Job.SUCCESS: state = "SUCCESS"; break; case Job.WAITING: state = "WAITING"; break; } sb.append("job state:\t").append(state).append("\n"); sb.append("job id:\t").append(aJob.getJobID()).append("\n"); sb.append("job message:\t").append(aJob.getMessage()).append("\n"); // comment out on March 30, 2012. As NPE is thrown on Apollo. // // if ( aJob.getDependingJobs () == null || aJob.getDependingJobs ().size () == 0 ) // { // sb.append ("job has no depending job:\t").append ("\n"); // } else // { // sb.append ("job has ").append (aJob.getDependingJobs ().size ()).append (" dependeng jobs:\n"); // for ( int i = 0; i < aJob.getDependingJobs ().size (); i++ ) // { // sb.append ("\t depending job ").append (i).append (":\t"); // sb.append ((aJob.getDependingJobs ().get (i)).getJobName ()).append ("\n"); // } // } return sb.toString().trim(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L1.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/* w w w . ja v a2 s.c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L1.class); lp.setJobName("L1 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(IntWritable.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Group.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L1out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L1 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L10.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from w ww. j ava2 s.c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L10.class); lp.setJobName("L10 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(MyType.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setReducerClass(Group.class); lp.setPartitionerClass(MyPartitioner.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out")); // Hardcode the parallel to 40 since MyPartitioner assumes it lp.setNumReduceTasks(40); Job group = new Job(lp); JobControl jc = new JobControl("L10 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L11.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);// w ww.j a v a2 s. c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L11.class); lp.setJobName("L11 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(ReadPageViews.class); lp.setReducerClass(ReadPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/p")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L11.class); lu.setJobName("L11 Load Widerow"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadWideRow.class); lu.setCombinerClass(ReadWideRow.class); lu.setReducerClass(ReadWideRow.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/widerow")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/wr")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadWideRow = new Job(lu); JobConf join = new JobConf(L11.class); join.setJobName("L11 Union WideRow and Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setCombinerClass(Union.class); join.setReducerClass(Union.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/p")); FileInputFormat.addInputPath(join, new Path(outputDir + "/wr")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L11out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadWideRow); JobControl jc = new JobControl("L11 join"); jc.addJob(loadPages); jc.addJob(loadWideRow); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L12.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from www . jav a 2 s .c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L12.class); lp.setJobName("L12 Find Highest Value Page Per User"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(DoubleWritable.class); lp.setMapperClass(HighestValuePagePerUser.class); lp.setCombinerClass(HighestValuePagePerUser.class); lp.setReducerClass(HighestValuePagePerUser.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/highest_value_page_per_user")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L12.class); lu.setJobName("L12 Find Total Timespent per Term"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(LongWritable.class); lu.setMapperClass(TotalTimespentPerTerm.class); lu.setCombinerClass(TotalTimespentPerTerm.class); lu.setReducerClass(TotalTimespentPerTerm.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/total_timespent_per_term")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadUsers = new Job(lu); JobConf join = new JobConf(L12.class); join.setJobName("L12 Find Queries Per Action"); join.setInputFormat(TextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(LongWritable.class); join.setMapperClass(QueriesPerAction.class); join.setCombinerClass(QueriesPerAction.class); join.setReducerClass(QueriesPerAction.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/queries_per_action")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); JobControl jc = new JobControl("L12 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L13.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);// www . jav a2s . c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L13.class); lp.setJobName("L13 Load Left Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadLeftPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_left_pages")); lp.setNumReduceTasks(0); Job loadPages = new Job(lp); JobConf lu = new JobConf(L13.class); lu.setJobName("L13 Load Right Page Views"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadRightPageViews.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/power_users_samples")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_right_pages")); lu.setNumReduceTasks(0); Job loadUsers = new Job(lu); JobConf join = new JobConf(L13.class); join.setJobName("L13 Join Two Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setReducerClass(Join.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_left_pages")); FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_right_pages")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L13out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadUsers); JobControl jc = new JobControl("L13 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L14.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*w w w. ja v a2 s .c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L14.class); lp.setJobName("L14 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views_sorted")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_pages_14")); lp.setNumReduceTasks(0); Job loadPages = new Job(lp); JobConf lu = new JobConf(L14.class); lu.setJobName("L14 Load Users"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadUsers.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/users_sorted")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_users_14")); lu.setNumReduceTasks(0); Job loadUsers = new Job(lu); JobConf join = new JobConf(L14.class); join.setJobName("L14 Join Users and Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setReducerClass(Join.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_pages_14")); FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_users_14")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L14out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadUsers); JobControl jc = new JobControl("L14 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L15.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);// w w w . ja v a2s . c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L15.class); lp.setJobName("L15 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Combiner.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L15out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L15 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L16.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/* w w w.jav a 2 s.co m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L16.class); lp.setJobName("L16 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L16out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L16 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }