List of usage examples for org.apache.hadoop.mapred.jobcontrol Job getDependingJobs
public ArrayList<Job> getDependingJobs()
From source file:com.ebay.erl.mobius.core.MobiusJob.java
License:Apache License
/** * Add a job, represented by the <code>aNewJob</code> object, into the execution queue. * <p>/*from w w w . j a v a2 s .com*/ * * Users can use this method to add one or more jobs' configuration into the job queue, and Mobius engine * will analyze the <code>aNewJob</code> objects within the queue to understand the dependence of jobs. * For example, if job B's input is from job A, then job B won't be submitted until A is completed * successfully. If A failed, the B will not be submitted. * <p> * * * @param aNewJobConf a {@link Configuration} object represents a Hadoop job. * @throws IOException */ protected void addToExecQueue(Configuration aNewJobConf) throws IOException { // Add the new job into execution engine and realize // its dependency, if any. // // To realize the job dependency, we need to analyze the input // path of this new job. // // The inputs of a job could be: // 1) if aNewJob is not a derived job (ex: result of another MR job), // then the inputs of the job can be retrieved from "mapred.input.dir", // or from {@link MultipleInputs} (ex, joining different type of dataset)/ // 2) if aNewJob is a derived job, the input is from the output of previous // MR job. String inputFolders = aNewJobConf.get("mapred.input.dir", ""); if (inputFolders.length() == 0) { // the value of "mapred.input.dir" is empty, assuming the inputs of this job // are coming from {@link MultipleInputs}. String multipleInputs = aNewJobConf .get("mapred.input.dir.mappers"/* for using old MultipleInputs, v0.20.X */, aNewJobConf.get( "mapreduce.input.multipleinputs.dir.formats"/* for new MultipleInputs, v0.23.X */, "")); if (multipleInputs.length() > 0) { // the input paths of this job is coming from MultipleInputs, extract the input paths. // The format from {@link MultipleInputs} is like: hadoop_path1;corresponding_mapper1,hadoop_path2;corresponding_mapper2... String[] pathAndMapperPairs = multipleInputs.split(","); for (String aPair : pathAndMapperPairs) { String[] pathToMapper = aPair.split(";"); String path = pathToMapper[0]; String mapper = pathToMapper[1]; if (inputFolders.length() == 0) { inputFolders = getPathOnly(path); } else { inputFolders = inputFolders + "," + getPathOnly(path); } } } else { throw new IllegalArgumentException("Cannot find input path(s) of job: [" + aNewJobConf.get("mapred.job.name") + "] from the following attributes: " + "mapred.input.dir, mapred.input.dir.mappers, nor mapreduce.input.multipleinputs.dir.formats. " + "Please specify the input path(s) of this job."); } } else { // the input path of this job is specified in mapred.input.dir inputFolders = getPathOnly(inputFolders); } //////////////////////////////////////////////////////////// // validate output path of this job, to ensure it doesn't // use the same folder of another job's output. //////////////////////////////////////////////////////////// String outputPath = aNewJobConf.get("mapred.output.dir", ""); if (outputPath.isEmpty()) throw new IllegalStateException( "Please specify the output directory of job:" + aNewJobConf.get("mapred.job.name")); if (this.isOutputOfAnotherJob(outputPath)) { throw new IllegalArgumentException("Job [" + aNewJobConf.get("mapred.job.name") + "]'s output [" + outputPath + "] is " + "the output of job[" + jobTopology.get(outputPath).getJobName() + "], " + "please make sure to use different output folder for each job."); } ////////////////////////////////////////////////////////////////// // pass all the validation, start to build the dependencies. ////////////////////////////////////////////////////////////////// Job newJob = new ConfigurableJob(new JobConf(aNewJobConf, this.getClass())); newJob.setJobName(aNewJobConf.get("mapred.job.name", aNewJobConf.get("mapreduce.job.name", "Mobius Job"))); for (String anInputOfNewJob : inputFolders.split(",")) { // Added to track inputs for local PC sampling inputPaths.add(anInputOfNewJob); Job dependsOn = jobTopology.get(this.getFS().makeQualified(new Path(anInputOfNewJob)).toUri()); if (dependsOn != null) { List<Job> dependingJobs = newJob.getDependingJobs(); boolean alreadyInDependency = dependingJobs != null && dependingJobs.contains(dependsOn); if (alreadyInDependency) { // already added, do nothing. } else { LOGGER.info(newJob.getJobName() + " depends on " + dependsOn.getJobName()); newJob.addDependingJob(dependsOn); } } } // put the output of this <code>newJob</code> into job topology // so that later if a job read this <code>newJob</code>'s output // as its input, then the system can detect the dependency. URI outputPathURI = this.getFS().makeQualified(new Path(outputPath)).toUri(); LOGGER.info("Adding Job:" + newJob.getJobName() + "\tOutput:[" + outputPath.toString() + "]"); jobTopology.put(outputPathURI, newJob); }