List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java
License:Apache License
private static JobConf createSleepMapperReducerJobConf() { JobConf jConf = new JobConf(true); jConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); jConf.setMapperClass(SleepMapperReducerForTest.class); jConf.setReducerClass(SleepMapperReducerForTest.class); jConf.setOutputKeyClass(Text.class); jConf.setOutputValueClass(IntWritable.class); jConf.setInputFormat(TextInputFormat.class); jConf.setOutputFormat(TextOutputFormat.class); jConf.setNumReduceTasks(1); jConf.set(SleepMapperReducerForTest.SLEEP_TIME_MILLIS_KEY, "60000"); return jConf; }
From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java
License:Apache License
public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.setMapperClass(LauncherMapper.class); launcherConf.setSpeculativeExecution(false); launcherConf.setNumMapTasks(1);/* w w w . j a v a 2 s . com*/ launcherConf.setNumReduceTasks(0); launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) { List<String> purgedEntries = new ArrayList<String>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } FileSystem fs = Services.get().get(HadoopAccessorService.class) .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf); fs.mkdirs(actionDir); OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML)); try { actionConf.writeXml(os); } finally { IOUtils.closeSafely(os); } launcherConf.setInputFormat(OozieLauncherInputFormat.class); launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java
License:Apache License
/** * Submit a Pig job to hadoop./*from ww w.j a va2s . c o m*/ * * @param mapFuncs * a list of map functions to apply to the inputs. The cardinality of the list should * be the same as input's cardinality. * @param groupFuncs * a list of grouping functions to apply to the inputs. The cardinality of the list * should be the same as input's cardinality. * @param reduceFunc * the reduce function. * @param mapTasks * the number of map tasks to use. * @param reduceTasks * the number of reduce tasks to use. * @param input * a list of inputs * @param output * the path of the output. * @return an indicator of success or failure. * @throws IOException */ public boolean launchPig(POMapreduce pom) throws IOException { JobConf conf = new JobConf(config); setJobProperties(conf, pom); Properties properties = pom.pigContext.getProperties(); ConfigurationValidator.validatePigProperties(properties); String jobName = properties.getProperty(PigContext.JOB_NAME); conf.setJobName(jobName); boolean success = false; List<String> funcs = new ArrayList<String>(); if (pom.toMap != null) { for (EvalSpec es : pom.toMap) funcs.addAll(es.getFuncs()); } if (pom.groupFuncs != null) { for (EvalSpec es : pom.groupFuncs) funcs.addAll(es.getFuncs()); } if (pom.toReduce != null) { funcs.addAll(pom.toReduce.getFuncs()); } // create jobs.jar locally and pass it to hadoop File submitJarFile = File.createTempFile("Job", ".jar"); try { FileOutputStream fos = new FileOutputStream(submitJarFile); JarManager.createJar(fos, funcs, null, pom.pigContext); log.debug("Job jar size = " + submitJarFile.length()); conf.setJar(submitJarFile.getPath()); String user = System.getProperty("user.name"); conf.setUser(user != null ? user : "Pigster"); conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold")); conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size")); if (pom.reduceParallelism != -1) { conf.setNumReduceTasks(pom.reduceParallelism); } if (pom.toMap != null) { conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap)); } if (pom.toCombine != null) { conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine)); // this is to make sure that combiner is only called once // since we can't handle no combine or multiple combines conf.setCombineOnceOnly(true); } if (pom.groupFuncs != null) { conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs)); } if (pom.toReduce != null) { conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce)); } if (pom.toSplit != null) { conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit)); } if (pom.pigContext != null) { conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext)); } conf.setMapRunnerClass(PigMapReduce.class); if (pom.toCombine != null) { conf.setCombinerClass(PigCombine.class); //conf.setCombinerClass(PigMapReduce.class); } if (pom.quantilesFile != null) { conf.set("pig.quantilesFile", pom.quantilesFile); } else { // this is not a sort job - can use byte comparison to speed up processing conf.setOutputKeyComparatorClass(PigWritableComparator.class); } if (pom.partitionFunction != null) { conf.setPartitionerClass(SortPartitioner.class); } conf.setReducerClass(PigMapReduce.class); conf.setInputFormat(PigInputFormat.class); conf.setOutputFormat(PigOutputFormat.class); // not used starting with 0.15 conf.setInputKeyClass(Text.class); // not used starting with 0.15 conf.setInputValueClass(Tuple.class); conf.setOutputKeyClass(Tuple.class); if (pom.userComparator != null) { conf.setOutputKeyComparatorClass(pom.userComparator); } conf.setOutputValueClass(IndexedTuple.class); conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs)); conf.setOutputPath(new Path(pom.outputFileSpec.getFileName())); conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec())); // Setup the DistributedCache for this job setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true); setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false); // Setup the logs directory for this job String jobOutputFileName = pom.pigContext.getJobOutputFile(); if (jobOutputFileName != null && jobOutputFileName.length() > 0) { Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile()); conf.set("pig.output.dir", jobOutputFile.getParent().toString()); conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString()); } // // Now, actually submit the job (using the submit name) // JobClient jobClient = execEngine.getJobClient(); RunningJob status = jobClient.submitJob(conf); log.debug("submitted job: " + status.getJobID()); long sleepTime = 1000; double lastQueryProgress = -1.0; int lastJobsQueued = -1; double lastMapProgress = -1.0; double lastReduceProgress = -1.0; while (true) { try { Thread.sleep(sleepTime); } catch (Exception e) { } if (status.isComplete()) { success = status.isSuccessful(); if (log.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); sb.append("Job finished "); sb.append((success ? "" : "un")); sb.append("successfully"); log.debug(sb.toString()); } if (success) { mrJobNumber++; } double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } break; } else // still running { double mapProgress = status.mapProgress(); double reduceProgress = status.reduceProgress(); if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) { if (log.isDebugEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Hadoop job progress: Map="); sbProgress.append((int) (mapProgress * 100)); sbProgress.append("% Reduce="); sbProgress.append((int) (reduceProgress * 100)); sbProgress.append("%"); log.debug(sbProgress.toString()); } lastMapProgress = mapProgress; lastReduceProgress = reduceProgress; } double numJobsCompleted = mrJobNumber; double thisJobProgress = (mapProgress + reduceProgress) / 2.0; double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } } } // bug 1030028: if the input file is empty; hadoop doesn't create the output file! Path outputFile = conf.getOutputPath(); String outputName = outputFile.getName(); int colon = outputName.indexOf(':'); if (colon != -1) { outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon)); } try { ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs())) .asElement(outputFile.toString()); if (success && !descriptor.exists()) { // create an empty output file PigFile f = new PigFile(outputFile.toString(), false); f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext); } } catch (DataStorageException e) { throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e); } if (!success) { // go find the error messages getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map"); getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce"); } else { long timeSpent = 0; // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet. TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID()); TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID()); for (TaskReport r : mapReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } for (TaskReport r : reduceReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } totalHadoopTimeSpent += timeSpent; } } catch (Exception e) { // Do we need different handling for different exceptions e.printStackTrace(); throw WrappedIOException.wrap(e); } finally { submitJarFile.delete(); } return success; }
From source file:org.apache.pig.test.pigmix.mapreduce.L1.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//w ww. j ava 2 s. c om } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L1.class); lp.setJobName("L1 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(IntWritable.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Group.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L1out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L1 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L10.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//from w w w . j a va2 s . c om } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L10.class); lp.setJobName("L10 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(MyType.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setReducerClass(Group.class); lp.setPartitionerClass(MyPartitioner.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out")); // Hardcode the parallel to 40 since MyPartitioner assumes it lp.setNumReduceTasks(40); Job group = new Job(lp); JobControl jc = new JobControl("L10 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L11.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from w w w .j a va 2 s. c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L11.class); lp.setJobName("L11 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(ReadPageViews.class); lp.setReducerClass(ReadPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/p")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L11.class); lu.setJobName("L11 Load Widerow"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadWideRow.class); lu.setCombinerClass(ReadWideRow.class); lu.setReducerClass(ReadWideRow.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/widerow")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/wr")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadWideRow = new Job(lu); JobConf join = new JobConf(L11.class); join.setJobName("L11 Union WideRow and Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setCombinerClass(Union.class); join.setReducerClass(Union.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/p")); FileInputFormat.addInputPath(join, new Path(outputDir + "/wr")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L11out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadWideRow); JobControl jc = new JobControl("L11 join"); jc.addJob(loadPages); jc.addJob(loadWideRow); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L12.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from www .j av a 2 s .co m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L12.class); lp.setJobName("L12 Find Highest Value Page Per User"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(DoubleWritable.class); lp.setMapperClass(HighestValuePagePerUser.class); lp.setCombinerClass(HighestValuePagePerUser.class); lp.setReducerClass(HighestValuePagePerUser.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/highest_value_page_per_user")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job loadPages = new Job(lp); JobConf lu = new JobConf(L12.class); lu.setJobName("L12 Find Total Timespent per Term"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(LongWritable.class); lu.setMapperClass(TotalTimespentPerTerm.class); lu.setCombinerClass(TotalTimespentPerTerm.class); lu.setReducerClass(TotalTimespentPerTerm.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/total_timespent_per_term")); lu.setNumReduceTasks(Integer.parseInt(parallel)); Job loadUsers = new Job(lu); JobConf join = new JobConf(L12.class); join.setJobName("L12 Find Queries Per Action"); join.setInputFormat(TextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(LongWritable.class); join.setMapperClass(QueriesPerAction.class); join.setCombinerClass(QueriesPerAction.class); join.setReducerClass(QueriesPerAction.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/queries_per_action")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); JobControl jc = new JobControl("L12 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L13.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//from www . j a va2 s . co m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L13.class); lp.setJobName("L13 Load Left Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadLeftPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_left_pages")); lp.setNumReduceTasks(0); Job loadPages = new Job(lp); JobConf lu = new JobConf(L13.class); lu.setJobName("L13 Load Right Page Views"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadRightPageViews.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/power_users_samples")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_right_pages")); lu.setNumReduceTasks(0); Job loadUsers = new Job(lu); JobConf join = new JobConf(L13.class); join.setJobName("L13 Join Two Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setReducerClass(Join.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_left_pages")); FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_right_pages")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L13out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadUsers); JobControl jc = new JobControl("L13 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L14.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);//w w w . j ava2 s . c o m } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; String user = System.getProperty("user.name"); JobConf lp = new JobConf(L14.class); lp.setJobName("L14 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views_sorted")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_pages_14")); lp.setNumReduceTasks(0); Job loadPages = new Job(lp); JobConf lu = new JobConf(L14.class); lu.setJobName("L14 Load Users"); lu.setInputFormat(TextInputFormat.class); lu.setOutputKeyClass(Text.class); lu.setOutputValueClass(Text.class); lu.setMapperClass(ReadUsers.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lu.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lu, new Path(inputDir + "/users_sorted")); FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_users_14")); lu.setNumReduceTasks(0); Job loadUsers = new Job(lu); JobConf join = new JobConf(L14.class); join.setJobName("L14 Join Users and Pages"); join.setInputFormat(KeyValueTextInputFormat.class); join.setOutputKeyClass(Text.class); join.setOutputValueClass(Text.class); join.setMapperClass(IdentityMapper.class); join.setReducerClass(Join.class); props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { join.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_pages_14")); FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_users_14")); FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L14out")); join.setNumReduceTasks(Integer.parseInt(parallel)); Job joinJob = new Job(join); joinJob.addDependingJob(loadPages); joinJob.addDependingJob(loadUsers); JobControl jc = new JobControl("L14 join"); jc.addJob(loadPages); jc.addJob(loadUsers); jc.addJob(joinJob); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }
From source file:org.apache.pig.test.pigmix.mapreduce.L15.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Parameters: inputDir outputDir parallel"); System.exit(1);/*from w w w.j a v a 2 s .c o m*/ } String inputDir = args[0]; String outputDir = args[1]; String parallel = args[2]; JobConf lp = new JobConf(L15.class); lp.setJobName("L15 Load Page Views"); lp.setInputFormat(TextInputFormat.class); lp.setOutputKeyClass(Text.class); lp.setOutputValueClass(Text.class); lp.setMapperClass(ReadPageViews.class); lp.setCombinerClass(Combiner.class); lp.setReducerClass(Group.class); Properties props = System.getProperties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { lp.set((String) entry.getKey(), (String) entry.getValue()); } FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views")); FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L15out")); lp.setNumReduceTasks(Integer.parseInt(parallel)); Job group = new Job(lp); JobControl jc = new JobControl("L15 join"); jc.addJob(group); new Thread(jc).start(); int i = 0; while (!jc.allFinished()) { ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } break; } try { Thread.sleep(5000); } catch (InterruptedException e) { } if (i % 10000 == 0) { System.out.println("Running jobs"); ArrayList<Job> running = jc.getRunningJobs(); if (running != null && running.size() > 0) { for (Job r : running) { System.out.println(r.getJobName()); } } System.out.println("Ready jobs"); ArrayList<Job> ready = jc.getReadyJobs(); if (ready != null && ready.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Waiting jobs"); ArrayList<Job> waiting = jc.getWaitingJobs(); if (waiting != null && waiting.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } System.out.println("Successful jobs"); ArrayList<Job> success = jc.getSuccessfulJobs(); if (success != null && success.size() > 0) { for (Job r : ready) { System.out.println(r.getJobName()); } } } i++; } ArrayList<Job> failures = jc.getFailedJobs(); if (failures != null && failures.size() > 0) { for (Job failure : failures) { System.err.println(failure.getMessage()); } } jc.stop(); }