Example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob getJob

List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob getJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob getJob.

Prototype

public synchronized Job getJob() 

Source Link

Usage

From source file:main.Driver.java

private ControlledJob setSortingJob(String input, String output, String outputFileName) throws Exception {

    _configuration.set(LicenseOutputFormat.OUTPUT_FILE_NAME, outputFileName);
    ControlledJob jc = new ControlledJob(_configuration);

    Job job = jc.getJob();
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(input));

    job.setOutputFormatClass(LicenseOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.setJarByClass(Driver.class);
    return jc;/*from   ww w.j a  v  a2  s .  c  om*/
}

From source file:main.Driver.java

private ControlledJob setMRJob(String input1, String input2, String output) throws IOException {

    _configuration.set(LicenseOutputFormat.NAMES, LicenseNameWritable.class.getName());
    _configuration.set(LicenseOutputFormat.LICENSE, LicenseTypeWritable.class.getName());

    String joinExpression = CompositeInputFormat.compose("inner", LicenseInputFormat.class, new Path(input1),
            new Path(input2));
    System.out.println(joinExpression);
    _configuration.set("mapreduce.join.expr", joinExpression);
    ControlledJob controlledJob = new ControlledJob(_configuration);
    Job job = controlledJob.getJob();
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(MapSideMapper.class);
    job.setReducerClass(MapSideReducer.class);
    job.setInputFormatClass(CompositeInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(output));
    job.setJarByClass(Driver.class);

    return controlledJob;
}

From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java

License:Apache License

/**
 * Run all jobs added to JobControl./*from  w ww  . j  a va2 s. co m*/
 */
public void run() throws IOException {
    // Initially, all jobs are in wait state.
    List<ControlledJob> jobsWithoutIds = this.jc.getWaitingJobList();
    int totalNeededMRJobs = jobsWithoutIds.size();
    LOG.info("{} map-reduce job(s) waiting for submission.", jobsWithoutIds.size());
    Thread jcThread = new Thread(this.jc, "Guagua-MapReduce-JobControl");
    jcThread.start();

    JobClient jobClient = new JobClient(new JobConf(new Configuration()));
    double lastProg = -1;

    Set<String> sucessfulJobs = new HashSet<String>();

    while (!this.jc.allFinished()) {
        try {
            jcThread.join(1000);
        } catch (InterruptedException ignore) {
            Thread.currentThread().interrupt();
        }
        List<ControlledJob> jobsAssignedIdInThisRun = new ArrayList<ControlledJob>(totalNeededMRJobs);

        for (ControlledJob job : jobsWithoutIds) {
            if (job.getJob().getJobID() != null) {
                jobsAssignedIdInThisRun.add(job);
                LOG.info("Job {} is started.", job.getJob().getJobID().toString());
            } else {
                // This job is not assigned an id yet.
            }
        }
        jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);

        List<ControlledJob> successfulJobs = jc.getSuccessfulJobList();
        for (ControlledJob controlledJob : successfulJobs) {
            String jobId = controlledJob.getJob().getJobID().toString();
            if (!sucessfulJobs.contains(jobId)) {
                LOG.info("Job {} is successful.", jobId);
                sucessfulJobs.add(jobId);
            }
        }

        List<ControlledJob> failedJobs = jc.getFailedJobList();
        for (ControlledJob controlledJob : failedJobs) {
            String failedJobId = controlledJob.getJob().getJobID().toString();
            if (!this.failedCheckingJobs.contains(failedJobId)) {
                this.failedCheckingJobs.add(failedJobId);
                String jobName = controlledJob.getJob().getJobName();
                Integer jobIndex = this.jobIndexMap.get(jobName);
                Integer runTimes = this.jobRunningTimes.get(jobIndex);
                if (runTimes <= 1) {
                    LOG.warn("Job {} is failed, will be submitted again.", jobName);
                    Job newJob = createJob(this.jobIndexParams.get(jobIndex));
                    this.jc.addJob(new ControlledJob(newJob, null));
                    this.jobRunningTimes.put(jobIndex, runTimes + 1);
                    this.jobIndexMap.put(newJob.getJobName(), jobIndex);
                    jobsWithoutIds = this.jc.getWaitingJobList();
                } else {
                    LOG.warn("Job {} is failed twice, will not be submitted again.", jobName);
                }
            }
        }
        double prog = calculateProgress(jc, jobClient) / totalNeededMRJobs;
        notifyProgress(prog, lastProg);
        lastProg = prog;

        try {
            Thread.sleep(2 * 1000);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
    }

    List<ControlledJob> successfulJobs = jc.getSuccessfulJobList();

    LOG.info("Sucessful jobs:");
    for (ControlledJob controlledJob : successfulJobs) {
        LOG.info("Job: {} ", controlledJob);
    }
    if (totalNeededMRJobs == successfulJobs.size()) {
        LOG.info("Guagua jobs: 100% complete");
        // add failed jobs to debug since all jobs are finished.
        List<ControlledJob> failedJobs = jc.getFailedJobList();
        if (failedJobs != null && failedJobs.size() > 0) {
            LOG.info("Failed jobs:");
            for (ControlledJob controlledJob : failedJobs) {
                LOG.debug("Job: {} ", controlledJob);
            }
        }
    } else {
        List<ControlledJob> failedJobs = jc.getFailedJobList();
        if (failedJobs != null && failedJobs.size() > 0) {
            LOG.info("Failed jobs:");
            for (ControlledJob controlledJob : failedJobs) {
                LOG.warn("Job: {} ", controlledJob);
            }
        }
    }
    this.jc.stop();
}

From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java

License:Apache License

/**
 * Returns the progress of a Job j which is part of a submitted JobControl object. The progress is for this Job. So
 * it has to be scaled down by the number of jobs that are present in the JobControl.
 * // w w  w  . j a  v  a  2 s . c o m
 * @param cjob
 *            - The Job for which progress is required
 * @param jobClient
 *            - the JobClient to which it has been submitted
 * @return Returns the percentage progress of this Job
 * @throws IOException
 *             In case any IOException connecting to JobTracker.
 */
protected double progressOfRunningJob(ControlledJob cjob, JobClient jobClient) throws IOException {
    @SuppressWarnings("deprecation")
    RunningJob rj = jobClient.getJob(cjob.getJob().getJobID().toString());
    if (rj == null && cjob.getJobState() == ControlledJob.State.SUCCESS)
        return 1;
    else if (rj == null)
        return 0;
    else {
        return rj.mapProgress();
    }
}