Example usage for org.apache.hadoop.mapred.jobcontrol JobControl JobControl

List of usage examples for org.apache.hadoop.mapred.jobcontrol JobControl JobControl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.jobcontrol JobControl JobControl.

Prototype

public JobControl(String groupName) 

Source Link

Document

Construct a job control for a group of jobs.

Usage

From source file:adept.pipeline.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobControl jc = new JobControl("Job Control");

    /** mapping from classname to actual Hadoop Job instance 
    that represents this job*///from ww  w .  jav a 2 s.c  o  m
    Map<String, Job> classnamesToJobs = new HashMap<String, Job>();

    if (args.length < 1) {
        System.out.println("Please provide the configuration file path as a command line argument.");
        System.exit(1);
    }

    // read configuration file
    //BufferedReader br = new BufferedReader(new InputStreamReader(Reader.findStreamInClasspathOrFileSystem("pipeline/pipelineconfig.txt")));
    BufferedReader br = new BufferedReader(
            new InputStreamReader(Reader.findStreamInClasspathOrFileSystem(args[0])));

    String line;

    //add jobs to job control
    while ((line = br.readLine()) != null) {
        String[] temp = line.split("\t");
        if (temp.length != 4) {
            System.out.println("The pipeline configuration file format is incorrect.");
            System.out.println("The correct format is: ");
            System.out.println("classname   dependency classnames   inputfile   outputdir");
            System.out.println("Please correct this issue and try again!");
        }

        Job job = new Job(MapReduce.getInstance().getConfiguration(temp[2], temp[3], temp[0]));

        // add dependencies
        if (!temp[1].equals("")) {
            String[] dependencies = temp[1].split(",");
            //TODO: Modify this for complex dependencies to 
            // add fusion job
            for (String dependency : dependencies) {
                //System.out.println("dependency is: " + dependency);
                if (classnamesToJobs.get(dependency) == null) {
                    System.out
                            .println("Dependency job not found. Please check your pipeline configuration file."
                                    + "All dependencies need to have been created first.");
                    System.exit(1);
                }
                job.addDependingJob(classnamesToJobs.get(dependency));
            }
        }

        // populate map
        classnamesToJobs.put(temp[0], job);

        jc.addJob(job);
    }

    handleRun(jc);
}

From source file:com.ebay.erl.mobius.core.MobiusJobRunner.java

License:Apache License

/**
 * Submit the <code>tool</code> with the specified <code>conf</code> and <code>args</code>.
 * <p>//from  ww w  .ja va2  s. com
 * 
 * <code>tool</code> can be {@link MobiusJob} or any instance of {@link org.apache.hadoop.util.Tool}.
 * If <code>tool</code> is an instance of {@link MobiusJob}, it will be submitted using
 * {@link JobControl}.  If it's not an instance of {@link MobiusJob}, then it will be submitted
 * using <code>ToolRunner.run(conf, tool, args)</code> directly.
 * 
 */
public static int run(Configuration conf, Tool tool, String[] args) throws Exception {
    if (tool instanceof MobiusJob) {
        MobiusJob mobiusJob = (MobiusJob) tool;

        int exit;
        try {
            exit = ToolRunner.run(conf, tool, args);
        } catch (Throwable t) {
            t.printStackTrace();
            exit = 1;
        }

        if (exit == 0) {
            // setup correctly

            JobControl control = new JobControl("Mobius Job [" + tool.getClass().getCanonicalName() + "]");

            Collection<Job> allJobs = mobiusJob.jobTopology.values();
            control.addJobs(allJobs);

            LOGGER.info(allJobs.size() + " Hadoop job(s) to run.");

            Thread t = new Thread(control);
            t.start();

            StatusCheckingThread statusChecking = new StatusCheckingThread(tool, allJobs, control);
            statusChecking.start();

            statusChecking.join();
            LOGGER.info(" All job(s) done.");

            statusChecking.complete();

            int exitCode = control.getFailedJobs().size() == 0 ? 0 : 1;

            mobiusJob.deleteTempFiles();
            return exitCode;
        } else {
            mobiusJob.deleteTempFiles();
            return exit;
        }

    } else {
        return ToolRunner.run(conf, tool, args);
    }
}

From source file:com.jbw.jobcontrol.Patent.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Job job1 = Job.getInstance(conf);/*from  ww  w .j  a  va 2  s  . com*/
    job1.setJobName("test");
    job1.setJarByClass(Patent.class);

    ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            conf);
    ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf);

    job1.setReducerClass(IntSumReducer.class);

    Job job2 = Job.getInstance();
    ControlledJob cjob1 = new ControlledJob(job1.getConfiguration());
    ControlledJob cjob2 = new ControlledJob(job2.getConfiguration());
    cjob2.addDependingJob(cjob1);
    JobControl jc = new JobControl("process job");
    jc.addJob(cjob1);
    jc.addJob(cjob2);
    Thread t = new Thread(jc);
    t.start();
    while (true) {
        for (ControlledJob j : jc.getRunningJobList()) {
            break;
        }
        break;
    }
    return 0;
}

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Start the program.//from  w  ww  . ja  v  a  2 s  .  com
     * 
     * @param inputDir The path to the folder containing the raw listening data files.
     * @param outputPath The path for the results to be output to.
     * @throws IOException If an error occurs retrieving data from the file system or an error occurs running the job.
     */
    public void start(Path inputDir, Path outputDir) throws IOException {
        FileSystem fs = FileSystem.get(this.conf);

        JobConf uniqueListenersConf = getUniqueListenersJobConf(inputDir);
        Path listenersOutputDir = FileOutputFormat.getOutputPath(uniqueListenersConf);
        Job listenersJob = new Job(uniqueListenersConf);
        // delete any output that might exist from a previous run of this job
        if (fs.exists(FileOutputFormat.getOutputPath(uniqueListenersConf))) {
            fs.delete(FileOutputFormat.getOutputPath(uniqueListenersConf), true);
        }

        JobConf sumConf = getSumJobConf(inputDir);
        Path sumOutputDir = FileOutputFormat.getOutputPath(sumConf);
        Job sumJob = new Job(sumConf);
        // delete any output that might exist from a previous run of this job
        if (fs.exists(FileOutputFormat.getOutputPath(sumConf))) {
            fs.delete(FileOutputFormat.getOutputPath(sumConf), true);
        }

        // the merge job depends on the other two jobs
        ArrayList<Job> mergeDependencies = new ArrayList<Job>();
        mergeDependencies.add(listenersJob);
        mergeDependencies.add(sumJob);
        JobConf mergeConf = getMergeConf(outputDir, sumOutputDir, listenersOutputDir);
        Job mergeJob = new Job(mergeConf, mergeDependencies);
        // delete any output that might exist from a previous run of this job
        if (fs.exists(FileOutputFormat.getOutputPath(mergeConf))) {
            fs.delete(FileOutputFormat.getOutputPath(mergeConf), true);
        }

        // store the output paths of the intermediate jobs so this can be cleaned up after a successful run
        List<Path> deletePaths = new ArrayList<Path>();
        deletePaths.add(FileOutputFormat.getOutputPath(uniqueListenersConf));
        deletePaths.add(FileOutputFormat.getOutputPath(sumConf));

        JobControl control = new JobControl("TrackStatisticsProgram");
        control.addJob(listenersJob);
        control.addJob(sumJob);
        control.addJob(mergeJob);

        // execute the jobs
        try {
            Thread jobControlThread = new Thread(control, "jobcontrol");
            jobControlThread.start();
            while (!control.allFinished()) {
                Thread.sleep(1000);
            }
            if (control.getFailedJobs().size() > 0) {
                throw new IOException("One or more jobs failed");
            }
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while waiting for job control to finish", e);
        }

        // remove intermediate output paths
        for (Path deletePath : deletePaths) {
            fs.delete(deletePath, true);
        }
    }

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Start the program.//w w  w  .j a  va2s  .c  o  m
 * 
 * @param inputDir The path to the folder containing the raw listening data files.
 * @param outputPath The path for the results to be output to.
 * @throws IOException If an error occurs retrieving data from the file system or an error occurs running the job.
 */
public void start(Path inputDir, Path outputDir) throws IOException {
    FileSystem fs = FileSystem.get(this.conf);

    JobConf uniqueListenersConf = getUniqueListenersJobConf(inputDir);
    Path listenersOutputDir = FileOutputFormat.getOutputPath(uniqueListenersConf);
    Job listenersJob = new Job(uniqueListenersConf);
    // delete any output that might exist from a previous run of this job
    if (fs.exists(FileOutputFormat.getOutputPath(uniqueListenersConf))) {
        fs.delete(FileOutputFormat.getOutputPath(uniqueListenersConf), true);
    }

    JobConf sumConf = getSumJobConf(inputDir);
    Path sumOutputDir = FileOutputFormat.getOutputPath(sumConf);
    Job sumJob = new Job(sumConf);
    // delete any output that might exist from a previous run of this job
    if (fs.exists(FileOutputFormat.getOutputPath(sumConf))) {
        fs.delete(FileOutputFormat.getOutputPath(sumConf), true);
    }

    // the merge job depends on the other two jobs
    ArrayList<Job> mergeDependencies = new ArrayList<Job>();
    mergeDependencies.add(listenersJob);
    mergeDependencies.add(sumJob);
    JobConf mergeConf = getMergeConf(outputDir, sumOutputDir, listenersOutputDir);
    Job mergeJob = new Job(mergeConf, mergeDependencies);
    // delete any output that might exist from a previous run of this job
    if (fs.exists(FileOutputFormat.getOutputPath(mergeConf))) {
        fs.delete(FileOutputFormat.getOutputPath(mergeConf), true);
    }

    // store the output paths of the intermediate jobs so this can be cleaned up after a successful run
    List<Path> deletePaths = new ArrayList<Path>();
    deletePaths.add(FileOutputFormat.getOutputPath(uniqueListenersConf));
    deletePaths.add(FileOutputFormat.getOutputPath(sumConf));

    JobControl control = new JobControl("TrackStatisticsProgram");
    control.addJob(listenersJob);
    control.addJob(sumJob);
    control.addJob(mergeJob);

    // execute the jobs
    try {
        Thread jobControlThread = new Thread(control, "jobcontrol");
        jobControlThread.start();
        while (!control.allFinished()) {
            Thread.sleep(1000);
        }
        if (control.getFailedJobs().size() > 0) {
            throw new IOException("One or more jobs failed");
        }
    } catch (InterruptedException e) {
        throw new IOException("Interrupted while waiting for job control to finish", e);
    }

    // remove intermediate output paths
    for (Path deletePath : deletePaths) {
        fs.delete(deletePath, true);
    }
}

From source file:org.apache.pig.test.pigmix.mapreduce.L1.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//from w  w w.  j a  v  a2 s  .  c o  m
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L1.class);
    lp.setJobName("L1 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(IntWritable.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(Group.class);
    lp.setReducerClass(Group.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L1out"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job group = new Job(lp);

    JobControl jc = new JobControl("L1 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L10.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*w  w  w . j a v a 2  s. c om*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L10.class);
    lp.setJobName("L10 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(MyType.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setReducerClass(Group.class);
    lp.setPartitionerClass(MyPartitioner.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out"));
    // Hardcode the parallel to 40 since MyPartitioner assumes it
    lp.setNumReduceTasks(40);
    Job group = new Job(lp);

    JobControl jc = new JobControl("L10 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L11.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*w w  w.j  av a2  s  .co m*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L11.class);
    lp.setJobName("L11 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(ReadPageViews.class);
    lp.setReducerClass(ReadPageViews.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/p"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L11.class);
    lu.setJobName("L11 Load Widerow");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(Text.class);
    lu.setMapperClass(ReadWideRow.class);
    lu.setCombinerClass(ReadWideRow.class);
    lu.setReducerClass(ReadWideRow.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/widerow"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/wr"));
    lu.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadWideRow = new Job(lu);

    JobConf join = new JobConf(L11.class);
    join.setJobName("L11 Union WideRow and Pages");
    join.setInputFormat(KeyValueTextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(Text.class);
    join.setMapperClass(IdentityMapper.class);
    join.setCombinerClass(Union.class);
    join.setReducerClass(Union.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(outputDir + "/p"));
    FileInputFormat.addInputPath(join, new Path(outputDir + "/wr"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L11out"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);
    joinJob.addDependingJob(loadPages);
    joinJob.addDependingJob(loadWideRow);

    JobControl jc = new JobControl("L11 join");
    jc.addJob(loadPages);
    jc.addJob(loadWideRow);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L12.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*from  w  ww  .j a va 2  s  .  com*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L12.class);
    lp.setJobName("L12 Find Highest Value Page Per User");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(DoubleWritable.class);
    lp.setMapperClass(HighestValuePagePerUser.class);
    lp.setCombinerClass(HighestValuePagePerUser.class);
    lp.setReducerClass(HighestValuePagePerUser.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/highest_value_page_per_user"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L12.class);
    lu.setJobName("L12 Find Total Timespent per Term");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(LongWritable.class);
    lu.setMapperClass(TotalTimespentPerTerm.class);
    lu.setCombinerClass(TotalTimespentPerTerm.class);
    lu.setReducerClass(TotalTimespentPerTerm.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/total_timespent_per_term"));
    lu.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadUsers = new Job(lu);

    JobConf join = new JobConf(L12.class);
    join.setJobName("L12 Find Queries Per Action");
    join.setInputFormat(TextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(LongWritable.class);
    join.setMapperClass(QueriesPerAction.class);
    join.setCombinerClass(QueriesPerAction.class);
    join.setReducerClass(QueriesPerAction.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/queries_per_action"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);

    JobControl jc = new JobControl("L12 join");
    jc.addJob(loadPages);
    jc.addJob(loadUsers);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L13.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//from   w  w  w .  j av a2  s  .  c o m
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L13.class);
    lp.setJobName("L13 Load Left Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadLeftPageViews.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_left_pages"));
    lp.setNumReduceTasks(0);
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L13.class);
    lu.setJobName("L13 Load Right Page Views");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(Text.class);
    lu.setMapperClass(ReadRightPageViews.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/power_users_samples"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_right_pages"));
    lu.setNumReduceTasks(0);
    Job loadUsers = new Job(lu);

    JobConf join = new JobConf(L13.class);
    join.setJobName("L13 Join Two Pages");
    join.setInputFormat(KeyValueTextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(Text.class);
    join.setMapperClass(IdentityMapper.class);
    join.setReducerClass(Join.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_left_pages"));
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_right_pages"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L13out"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);
    joinJob.addDependingJob(loadPages);
    joinJob.addDependingJob(loadUsers);

    JobControl jc = new JobControl("L13 join");
    jc.addJob(loadPages);
    jc.addJob(loadUsers);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}