List of usage examples for org.apache.hadoop.mapred JobClient getAllJobs
public JobStatus[] getAllJobs() throws IOException
From source file:co.cask.cdap.app.mapreduce.MRJobClient.java
License:Apache License
/** * @param runId for which information will be returned. * @return a {@link MRJobInfo} containing information about a particular MapReduce program run. * @throws IOException if there is failure to communicate through the JobClient. * @throws NotFoundException if a Job with the given runId is not found. *///from w w w.ja va2s. co m public MRJobInfo getMRJobInfo(Id.Run runId) throws IOException, NotFoundException { Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType())); JobClient jobClient = new JobClient(hConf); JobStatus[] jobs = jobClient.getAllJobs(); JobStatus thisJob = findJobForRunId(jobs, runId); RunningJob runningJob = jobClient.getJob(thisJob.getJobID()); if (runningJob == null) { throw new IllegalStateException(String.format("JobClient returned null for RunId: '%s', JobId: '%s'", runId, thisJob.getJobID())); } Counters counters = runningJob.getCounters(); TaskReport[] mapTaskReports = jobClient.getMapTaskReports(thisJob.getJobID()); TaskReport[] reduceTaskReports = jobClient.getReduceTaskReports(thisJob.getJobID()); return new MRJobInfo(runningJob.mapProgress(), runningJob.reduceProgress(), groupToMap(counters.getGroup(TaskCounter.class.getName())), toMRTaskInfos(mapTaskReports), toMRTaskInfos(reduceTaskReports), true); }
From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java
License:Open Source License
/** * Checks any running/queued jobs and updates their status if they've completed *//* w w w.j a va 2s . c om*/ public void updateJobStatus() { Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>(); //get mongo entries that have jobids? try { JobClient jc = null; CustomMapReduceJobPojo cmr = getJobsToMakeComplete(); while (cmr != null) { boolean markedComplete = false; //make sure its an actual ID, we now set jobidS to "" when running the job if (!cmr.jobidS.equals("")) { if (null == jc) { try { jc = new JobClient(getJobClientConnection(), new Configuration()); } catch (Exception e) { // Better delete this, no idea what's going on.... _logger.info( "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id=" + cmr._id.toString() + " job_update_status_error_message=Skipping job: " + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce"); setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error (check configuration in /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?)."); cmr = getJobsToMakeComplete(); continue; } } //check if job is done, and update if it is JobStatus[] jobs = jc.getAllJobs(); boolean bFound = false; for (JobStatus j : jobs) { if (j.getJobID().getJtIdentifier().equals(cmr.jobidS) && j.getJobID().getId() == cmr.jobidN) { bFound = true; boolean error = false; markedComplete = j.isJobComplete(); String errorMessage = null; if (JobStatus.FAILED == j.getRunState()) { markedComplete = true; error = true; errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly?"; } setJobComplete(cmr, markedComplete, error, j.mapProgress(), j.reduceProgress(), errorMessage); break; // (from mini loop over hadoop jobs, not main loop over infinite tasks) } } if (!bFound) { // Possible error //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #2."); } } } else // this job hasn't been started yet: { //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #1."); } } //job was not done, need to set flag back if (!markedComplete) { incompleteJobsMap.put(cmr._id, cmr.jobidS); } cmr = getJobsToMakeComplete(); } } catch (Exception ex) { _logger.info("job_error_checking_status_message=" + HarvestExceptionUtils.createExceptionMessage(ex)); } catch (Error err) { // Really really want to get to the next line of code, and clear the status... } //set all incomplete jobs back for (ObjectId id : incompleteJobsMap.keySet()) { BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id)); DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id), new BasicDBObject(MongoDbManager.set_, update)); } }
From source file:com.ikanow.infinit.e.processing.custom.CustomProcessingController.java
License:Open Source License
public boolean checkRunningJobs(CustomMapReduceJobPojo jobOverride) { Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>(); //get mongo entries that have jobids? try {//from w ww . j a v a 2 s . c om JobClient jc = null; CustomMapReduceJobPojo cmr = jobOverride; if (null == cmr) cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); else if (null == cmr.jobidS) return true; while (cmr != null) { boolean markedComplete = false; //make sure its an actual ID, we now set jobidS to "" when running the job if (!cmr.jobidS.equals("")) // non null by construction { if (null == jc) { try { jc = new JobClient(InfiniteHadoopUtils.getJobClientConnection(prop_custom), new Configuration()); } catch (Exception e) { // Better delete this, no idea what's going on.... _logger.info( "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id=" + cmr._id.toString() + " job_update_status_error_message=Skipping job: " + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce"); _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error (check configuration in /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?)."); incompleteJobsMap.remove(cmr._id); cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); continue; } } //check if job is done, and update if it is JobStatus[] jobs = jc.getAllJobs(); boolean bFound = false; for (JobStatus j : jobs) { if (j.getJobID().getJtIdentifier().equals(cmr.jobidS) && j.getJobID().getId() == cmr.jobidN) { bFound = true; boolean error = false; markedComplete = j.isJobComplete(); String errorMessage = null; if (JobStatus.FAILED == j.getRunState()) { markedComplete = true; error = true; errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly? " + j.getFailureInfo(); } _statusManager.setJobComplete(cmr, markedComplete, error, j.mapProgress(), j.reduceProgress(), errorMessage); break; // (from mini loop over hadoop jobs, not main loop over infinite tasks) } } if (!bFound) { // Possible error //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #2."); } } } else // this job hasn't been started yet: { //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #1."); } } //job was done, remove flag if (markedComplete) { incompleteJobsMap.remove(cmr._id); } if (null == jobOverride) cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); else cmr = null; } } catch (Exception ex) { _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(ex)); } catch (Error err) { // Really really want to get to the next line of code, and clear the status... _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(err)); } if (null == jobOverride) { //set all incomplete jobs' status back for (ObjectId id : incompleteJobsMap.keySet()) { BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id)); DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id), new BasicDBObject(MongoDbManager.set_, update)); } } return incompleteJobsMap.isEmpty(); }
From source file:com.impetus.ankush2.hadoop.monitor.JobStatusProvider.java
License:Open Source License
public Map<String, Object> getJobDetails(JobClient jobClient, String jobId) throws AnkushException { String errMsg = "Unable to getch Hadoop jobs details, could not connect to Hadoop JobClient."; try {//from www . j a va2s. c om if (jobClient != null) { // Get the jobs that are submitted. JobStatus[] jobStatus = jobClient.getAllJobs(); for (JobStatus jobSts : jobStatus) { } } } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } return null; }
From source file:org.apache.accumulo.server.master.CoordinateRecoveryTask.java
License:Apache License
void cleanupOldJobs() { try {//ww w . j av a 2s. co m Configuration conf = CachedConfiguration.getInstance(); @SuppressWarnings("deprecation") JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf)); for (JobStatus status : jc.getAllJobs()) { if (!status.isJobComplete()) { RunningJob job = jc.getJob(status.getJobID()); if (job.getJobName().equals(LogSort.getJobName())) { log.info("found a running " + job.getJobName()); Configuration jobConfig = new Configuration(false); log.info("fetching configuration from " + job.getJobFile()); jobConfig.addResource(TraceFileSystem .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration())) .open(new Path(job.getJobFile()))); if (HdfsZooInstance.getInstance().getInstanceID() .equals(jobConfig.get(LogSort.INSTANCE_ID_PROPERTY))) { log.info("Killing job " + job.getID().toString()); } } } } FileStatus[] children = fs.listStatus(new Path(ServerConstants.getRecoveryDir())); if (children != null) { for (FileStatus child : children) { log.info("Deleting recovery directory " + child); fs.delete(child.getPath(), true); } } } catch (IOException e) { log.error("Error cleaning up old Log Sort jobs" + e); } catch (Exception e) { log.error("Unknown error cleaning up old jobs", e); } }
From source file:org.estado.core.JobStatusChecker.java
License:Apache License
public void checkStatus() { List<org.estado.spi.JobStatus> jobStatusList = new ArrayList<org.estado.spi.JobStatus>(); try {/*from www. jav a 2 s. co m*/ Configuration conf = new Configuration(); JobClient client = new JobClient(new JobConf(conf)); JobStatus[] jobStatuses = client.getAllJobs(); showFilter(); int jobCount = 0; for (JobStatus jobStatus : jobStatuses) { Long lastTaskEndTime = 0L; TaskReport[] mapReports = client.getMapTaskReports(jobStatus.getJobID()); for (TaskReport r : mapReports) { if (lastTaskEndTime < r.getFinishTime()) { lastTaskEndTime = r.getFinishTime(); } } TaskReport[] reduceReports = client.getReduceTaskReports(jobStatus.getJobID()); for (TaskReport r : reduceReports) { if (lastTaskEndTime < r.getFinishTime()) { lastTaskEndTime = r.getFinishTime(); } } client.getSetupTaskReports(jobStatus.getJobID()); client.getCleanupTaskReports(jobStatus.getJobID()); String jobId = jobStatus.getJobID().toString(); String jobName = client.getJob(jobStatus.getJobID()).getJobName(); Long startTime = jobStatus.getStartTime(); String user = jobStatus.getUsername(); int mapProgress = (int) (jobStatus.mapProgress() * 100); int reduceProgress = (int) (jobStatus.reduceProgress() * 100); org.estado.spi.JobStatus jobStat = null; ++jobCount; int runState = jobStatus.getRunState(); switch (runState) { case JobStatus.SUCCEEDED: if (filter.contains("s")) { Long duration = lastTaskEndTime - jobStatus.getStartTime(); jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime, lastTaskEndTime, duration, mapProgress, reduceProgress, "completed"); ++sCount; } break; case JobStatus.RUNNING: if (filter.contains("r")) { long duration = System.currentTimeMillis() - jobStatus.getStartTime(); jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime, lastTaskEndTime, duration, mapProgress, reduceProgress, "running"); ++rCount; } break; case JobStatus.FAILED: if (filter.contains("f")) { long duration = lastTaskEndTime - jobStatus.getStartTime(); jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime, lastTaskEndTime, duration, mapProgress, reduceProgress, "failed"); RunningJob job = client.getJob(jobStatus.getJobID()); jobStat.setJobTasks(getTaskDetails(job)); ++fCount; } break; case JobStatus.PREP: if (filter.contains("p")) { jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, null, null, null, 0, 0, "preparing"); ++pCount; } break; case JobStatus.KILLED: if (filter.contains("k")) { long duration = lastTaskEndTime - jobStatus.getStartTime(); jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime, lastTaskEndTime, duration, mapProgress, reduceProgress, "killed"); RunningJob job = client.getJob(jobStatus.getJobID()); jobStat.setJobTasks(getTaskDetails(job)); ++kCount; } break; } jobStatusList.add(jobStat); } //get counters for (org.estado.spi.JobStatus jobStat : jobStatusList) { if (!jobStat.getStatus().equals("preparing")) { List<JobCounterGroup> counterGroups = getJobCounters(jobStat.getJobId()); jobStat.setCounterGroups(counterGroups); //additional data from counters setJobInfo(jobStat); } } //publish to all consumers for (JobStatusConsumer consumer : consumers) { consumer.handle(jobStatusList); } showJobCounts(); } catch (Exception ex) { System.out.println("Jobs status checker failed" + ex.getMessage()); } }
From source file:org.godhuli.rhipe.FileUtils.java
License:Apache License
public long getStart(org.apache.hadoop.mapred.JobClient jc, org.apache.hadoop.mapred.JobID jj) throws Exception { //this is not needed if i can get a reference to JobTracker (which rtruns the JobStatus for a given JobID) org.apache.hadoop.mapred.JobStatus[] jbs = jc.getAllJobs(); for (int i = 0; i < jbs.length; i++) { if (jbs[i].getJobID().toString().equals(jj.toString())) { return (jbs[i].getStartTime()); }//from ww w. ja v a 2 s .c o m } return (0); }
From source file:org.openflamingo.engine.monitoring.hadoop.JobTrackerMonitor.java
License:Apache License
public void printJobs() throws IOException { JobConf conf = new JobConf(); conf.set("mapred.job,tracker", "localhost:9001"); JobClient jobClient = new JobClient(conf); JobStatus[] allJobs = jobClient.getAllJobs(); if (allJobs != null) { for (JobStatus status : allJobs) { System.out.println(status.getJobID()); System.out.println(status.getSchedulingInfo()); }// w w w .j av a2 s . c o m } System.out.println(jobClient.getClusterStatus().getMapTasks()); JobQueueInfo[] queues = jobClient.getQueues(); if (queues != null) for (JobQueueInfo queue : queues) { System.out.println(queue.getQueueName()); System.out.println(queue.getSchedulingInfo()); System.out.println(queue.getQueueState()); } JobStatus[] jobStatuses = jobClient.jobsToComplete(); if (jobStatuses != null) for (JobStatus jobStatus : jobStatuses) { System.out.println(jobStatus.getJobID().getId()); System.out.println(jobStatus.getSchedulingInfo()); } }
From source file:org.sleuthkit.web.sampleapp.server.SampleServiceImpl.java
License:Open Source License
/** * get job data (one row per job) from hadoop * @return array of jobs (one row per job). Each row is an array of strings (one per column), matching columns[]. * @throws Exception//from w w w .j ava 2 s .c o m */ public String[][] getData() throws IllegalArgumentException { List<String[]> result = new ArrayList<String[]>(); //first we add "ghost jobs" based on image ids we have - update these if we have real jobs addGhostJobs(result); Configuration conf = new Configuration(); JobClient jobClient; try { jobClient = new JobClient(new InetSocketAddress("localhost", 8021), conf); jobClient.setConf(conf); // Bug in constructor, doesn't set conf. for (JobStatus js : jobClient.getAllJobs()) { RunningJob rj = jobClient.getJob(js.getJobID()); if (rj == null) continue; String jobName = rj.getJobName(); if (jobName == null) continue; //extract TP$imageHash$imageId$step from jobName - we filter on image hash if (jobName.startsWith("TP") && !jobName.contains("_TEST")) { String[] names = jobName.split("\\$"); if (names.length != 4) { System.err.println("Invalid job name of TP job " + jobName); } processJob(result, names, js, rj); } } //sort descending by time Collections.sort(result, new Comparator<String[]>() { public int compare(String[] a1, String[] a2) { return a2[TIME_START].compareTo(a1[TIME_START]); } }); return result.toArray(new String[0][0]); } catch (IOException e) { e.printStackTrace(); return null; } }