Example usage for org.apache.hadoop.mapred.jobcontrol Job getDependingJobs

List of usage examples for org.apache.hadoop.mapred.jobcontrol Job getDependingJobs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.jobcontrol Job getDependingJobs.

Prototype

public ArrayList<Job> getDependingJobs() 

Source Link

Usage

From source file:com.ebay.erl.mobius.core.MobiusJob.java

License:Apache License

/**
 * Add a job, represented by the <code>aNewJob</code> object, into the execution queue.
 * <p>/*from   w w w  .  j a  v  a2 s .com*/
 * 
 * Users can use this method to add one or more jobs' configuration into the job queue, and Mobius engine
 * will analyze the <code>aNewJob</code> objects within the queue to understand the dependence of jobs.  
 * For example, if job B's input is from job A, then job B won't be submitted until A is completed 
 * successfully.  If A failed, the B will not be submitted.
 * <p>
 *  
 * 
 * @param aNewJobConf a {@link Configuration} object represents a Hadoop job. 
 * @throws IOException
 */
protected void addToExecQueue(Configuration aNewJobConf) throws IOException {
    // Add the new job into execution engine and realize
    // its dependency, if any.
    //
    // To realize the job dependency, we need to analyze the input
    // path of this new job.
    // 
    // The inputs of a job could be:
    // 1) if aNewJob is not a derived job (ex: result of another MR job), 
    // then the inputs of the job can be retrieved from "mapred.input.dir",
    // or from {@link MultipleInputs} (ex, joining different type of dataset)/
    // 2) if aNewJob is a derived job, the input  is from the output of previous
    // MR job.

    String inputFolders = aNewJobConf.get("mapred.input.dir", "");
    if (inputFolders.length() == 0) {
        // the value of "mapred.input.dir" is empty, assuming the inputs of this job 
        // are coming from {@link MultipleInputs}.

        String multipleInputs = aNewJobConf
                .get("mapred.input.dir.mappers"/* for using old MultipleInputs, v0.20.X */, aNewJobConf.get(
                        "mapreduce.input.multipleinputs.dir.formats"/* for new MultipleInputs, v0.23.X */, ""));

        if (multipleInputs.length() > 0) {
            // the input paths of this job is coming from MultipleInputs, extract the input paths.
            // The format from {@link MultipleInputs} is like: hadoop_path1;corresponding_mapper1,hadoop_path2;corresponding_mapper2...
            String[] pathAndMapperPairs = multipleInputs.split(",");
            for (String aPair : pathAndMapperPairs) {
                String[] pathToMapper = aPair.split(";");
                String path = pathToMapper[0];
                String mapper = pathToMapper[1];

                if (inputFolders.length() == 0) {
                    inputFolders = getPathOnly(path);
                } else {
                    inputFolders = inputFolders + "," + getPathOnly(path);
                }
            }
        } else {
            throw new IllegalArgumentException("Cannot find input path(s) of job: ["
                    + aNewJobConf.get("mapred.job.name") + "] from the following attributes: "
                    + "mapred.input.dir, mapred.input.dir.mappers, nor mapreduce.input.multipleinputs.dir.formats. "
                    + "Please specify the input path(s) of this job.");
        }
    } else {
        // the input path of this job is specified in mapred.input.dir
        inputFolders = getPathOnly(inputFolders);
    }

    ////////////////////////////////////////////////////////////
    // validate output path of this job, to ensure it doesn't
    // use the same folder of another job's output.
    ////////////////////////////////////////////////////////////
    String outputPath = aNewJobConf.get("mapred.output.dir", "");
    if (outputPath.isEmpty())
        throw new IllegalStateException(
                "Please specify the output directory of job:" + aNewJobConf.get("mapred.job.name"));

    if (this.isOutputOfAnotherJob(outputPath)) {
        throw new IllegalArgumentException("Job [" + aNewJobConf.get("mapred.job.name") + "]'s output ["
                + outputPath + "] is " + "the output of job[" + jobTopology.get(outputPath).getJobName() + "], "
                + "please make sure to use different output folder for each job.");
    }

    //////////////////////////////////////////////////////////////////
    // pass all the validation, start to build the dependencies.
    //////////////////////////////////////////////////////////////////
    Job newJob = new ConfigurableJob(new JobConf(aNewJobConf, this.getClass()));

    newJob.setJobName(aNewJobConf.get("mapred.job.name", aNewJobConf.get("mapreduce.job.name", "Mobius Job")));
    for (String anInputOfNewJob : inputFolders.split(",")) {
        // Added to track inputs for local PC sampling
        inputPaths.add(anInputOfNewJob);

        Job dependsOn = jobTopology.get(this.getFS().makeQualified(new Path(anInputOfNewJob)).toUri());
        if (dependsOn != null) {
            List<Job> dependingJobs = newJob.getDependingJobs();

            boolean alreadyInDependency = dependingJobs != null && dependingJobs.contains(dependsOn);
            if (alreadyInDependency) {
                // already added, do nothing.
            } else {
                LOGGER.info(newJob.getJobName() + " depends on " + dependsOn.getJobName());
                newJob.addDependingJob(dependsOn);
            }
        }
    }

    // put the output of this <code>newJob</code> into job topology
    // so that later if a job read this <code>newJob</code>'s output
    // as its input, then the system can detect the dependency.

    URI outputPathURI = this.getFS().makeQualified(new Path(outputPath)).toUri();
    LOGGER.info("Adding Job:" + newJob.getJobName() + "\tOutput:[" + outputPath.toString() + "]");
    jobTopology.put(outputPathURI, newJob);
}