List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob getJob
public synchronized Job getJob()
From source file:main.Driver.java
private ControlledJob setSortingJob(String input, String output, String outputFileName) throws Exception { _configuration.set(LicenseOutputFormat.OUTPUT_FILE_NAME, outputFileName); ControlledJob jc = new ControlledJob(_configuration); Job job = jc.getJob(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat.addInputPath(job, new Path(input)); job.setOutputFormatClass(LicenseOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(Driver.class); return jc;/*from ww w.j a v a2 s . c om*/ }
From source file:main.Driver.java
private ControlledJob setMRJob(String input1, String input2, String output) throws IOException { _configuration.set(LicenseOutputFormat.NAMES, LicenseNameWritable.class.getName()); _configuration.set(LicenseOutputFormat.LICENSE, LicenseTypeWritable.class.getName()); String joinExpression = CompositeInputFormat.compose("inner", LicenseInputFormat.class, new Path(input1), new Path(input2)); System.out.println(joinExpression); _configuration.set("mapreduce.join.expr", joinExpression); ControlledJob controlledJob = new ControlledJob(_configuration); Job job = controlledJob.getJob(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MapSideMapper.class); job.setReducerClass(MapSideReducer.class); job.setInputFormatClass(CompositeInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(Driver.class); return controlledJob; }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
/** * Run all jobs added to JobControl./*from w ww . j a va2 s. co m*/ */ public void run() throws IOException { // Initially, all jobs are in wait state. List<ControlledJob> jobsWithoutIds = this.jc.getWaitingJobList(); int totalNeededMRJobs = jobsWithoutIds.size(); LOG.info("{} map-reduce job(s) waiting for submission.", jobsWithoutIds.size()); Thread jcThread = new Thread(this.jc, "Guagua-MapReduce-JobControl"); jcThread.start(); JobClient jobClient = new JobClient(new JobConf(new Configuration())); double lastProg = -1; Set<String> sucessfulJobs = new HashSet<String>(); while (!this.jc.allFinished()) { try { jcThread.join(1000); } catch (InterruptedException ignore) { Thread.currentThread().interrupt(); } List<ControlledJob> jobsAssignedIdInThisRun = new ArrayList<ControlledJob>(totalNeededMRJobs); for (ControlledJob job : jobsWithoutIds) { if (job.getJob().getJobID() != null) { jobsAssignedIdInThisRun.add(job); LOG.info("Job {} is started.", job.getJob().getJobID().toString()); } else { // This job is not assigned an id yet. } } jobsWithoutIds.removeAll(jobsAssignedIdInThisRun); List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); for (ControlledJob controlledJob : successfulJobs) { String jobId = controlledJob.getJob().getJobID().toString(); if (!sucessfulJobs.contains(jobId)) { LOG.info("Job {} is successful.", jobId); sucessfulJobs.add(jobId); } } List<ControlledJob> failedJobs = jc.getFailedJobList(); for (ControlledJob controlledJob : failedJobs) { String failedJobId = controlledJob.getJob().getJobID().toString(); if (!this.failedCheckingJobs.contains(failedJobId)) { this.failedCheckingJobs.add(failedJobId); String jobName = controlledJob.getJob().getJobName(); Integer jobIndex = this.jobIndexMap.get(jobName); Integer runTimes = this.jobRunningTimes.get(jobIndex); if (runTimes <= 1) { LOG.warn("Job {} is failed, will be submitted again.", jobName); Job newJob = createJob(this.jobIndexParams.get(jobIndex)); this.jc.addJob(new ControlledJob(newJob, null)); this.jobRunningTimes.put(jobIndex, runTimes + 1); this.jobIndexMap.put(newJob.getJobName(), jobIndex); jobsWithoutIds = this.jc.getWaitingJobList(); } else { LOG.warn("Job {} is failed twice, will not be submitted again.", jobName); } } } double prog = calculateProgress(jc, jobClient) / totalNeededMRJobs; notifyProgress(prog, lastProg); lastProg = prog; try { Thread.sleep(2 * 1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } List<ControlledJob> successfulJobs = jc.getSuccessfulJobList(); LOG.info("Sucessful jobs:"); for (ControlledJob controlledJob : successfulJobs) { LOG.info("Job: {} ", controlledJob); } if (totalNeededMRJobs == successfulJobs.size()) { LOG.info("Guagua jobs: 100% complete"); // add failed jobs to debug since all jobs are finished. List<ControlledJob> failedJobs = jc.getFailedJobList(); if (failedJobs != null && failedJobs.size() > 0) { LOG.info("Failed jobs:"); for (ControlledJob controlledJob : failedJobs) { LOG.debug("Job: {} ", controlledJob); } } } else { List<ControlledJob> failedJobs = jc.getFailedJobList(); if (failedJobs != null && failedJobs.size() > 0) { LOG.info("Failed jobs:"); for (ControlledJob controlledJob : failedJobs) { LOG.warn("Job: {} ", controlledJob); } } } this.jc.stop(); }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
/** * Returns the progress of a Job j which is part of a submitted JobControl object. The progress is for this Job. So * it has to be scaled down by the number of jobs that are present in the JobControl. * // w w w . j a v a 2 s . c o m * @param cjob * - The Job for which progress is required * @param jobClient * - the JobClient to which it has been submitted * @return Returns the percentage progress of this Job * @throws IOException * In case any IOException connecting to JobTracker. */ protected double progressOfRunningJob(ControlledJob cjob, JobClient jobClient) throws IOException { @SuppressWarnings("deprecation") RunningJob rj = jobClient.getJob(cjob.getJob().getJobID().toString()); if (rj == null && cjob.getJobState() == ControlledJob.State.SUCCESS) return 1; else if (rj == null) return 0; else { return rj.mapProgress(); } }