List of usage examples for org.apache.hadoop.mapred JobStatus getJobID
public JobID getJobID()
From source file:co.cask.cdap.app.mapreduce.MRJobClient.java
License:Apache License
/** * @param runId for which information will be returned. * @return a {@link MRJobInfo} containing information about a particular MapReduce program run. * @throws IOException if there is failure to communicate through the JobClient. * @throws NotFoundException if a Job with the given runId is not found. *///from w ww. j ava 2s. com public MRJobInfo getMRJobInfo(Id.Run runId) throws IOException, NotFoundException { Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType())); JobClient jobClient = new JobClient(hConf); JobStatus[] jobs = jobClient.getAllJobs(); JobStatus thisJob = findJobForRunId(jobs, runId); RunningJob runningJob = jobClient.getJob(thisJob.getJobID()); if (runningJob == null) { throw new IllegalStateException(String.format("JobClient returned null for RunId: '%s', JobId: '%s'", runId, thisJob.getJobID())); } Counters counters = runningJob.getCounters(); TaskReport[] mapTaskReports = jobClient.getMapTaskReports(thisJob.getJobID()); TaskReport[] reduceTaskReports = jobClient.getReduceTaskReports(thisJob.getJobID()); return new MRJobInfo(runningJob.mapProgress(), runningJob.reduceProgress(), groupToMap(counters.getGroup(TaskCounter.class.getName())), toMRTaskInfos(mapTaskReports), toMRTaskInfos(reduceTaskReports), true); }
From source file:com.google.HadoopMonitor.java
License:Open Source License
public void run() { while (true) { // Keep trying if there's a problem try {/*w ww . ja v a2 s . com*/ ProgressResult prog = new ProgressResult(); for (JobStatus job : jobClient.getAllJobs()) { String key = job.getJobID().toString(); if (jobs.containsKey(key)) { jobs.get(key).update(job); } else { jobs.put(key, new JobState(job)); } } prog.jobs = new ArrayList<JobState>(jobs.values()); String data = "data=" + URLEncoder.encode(prog.toString(), "UTF-8"); client.send("https://coordinator:8888/hadoop/status_update", data); } catch (IOException e) { System.err.println("Couldn't get or send progress: " + e); } try { Thread.sleep(SLEEP_TIME); } catch (InterruptedException e) { } } }
From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java
License:Open Source License
/** * Checks any running/queued jobs and updates their status if they've completed *///from w ww. j a v a 2s .c om public void updateJobStatus() { Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>(); //get mongo entries that have jobids? try { JobClient jc = null; CustomMapReduceJobPojo cmr = getJobsToMakeComplete(); while (cmr != null) { boolean markedComplete = false; //make sure its an actual ID, we now set jobidS to "" when running the job if (!cmr.jobidS.equals("")) { if (null == jc) { try { jc = new JobClient(getJobClientConnection(), new Configuration()); } catch (Exception e) { // Better delete this, no idea what's going on.... _logger.info( "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id=" + cmr._id.toString() + " job_update_status_error_message=Skipping job: " + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce"); setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error (check configuration in /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?)."); cmr = getJobsToMakeComplete(); continue; } } //check if job is done, and update if it is JobStatus[] jobs = jc.getAllJobs(); boolean bFound = false; for (JobStatus j : jobs) { if (j.getJobID().getJtIdentifier().equals(cmr.jobidS) && j.getJobID().getId() == cmr.jobidN) { bFound = true; boolean error = false; markedComplete = j.isJobComplete(); String errorMessage = null; if (JobStatus.FAILED == j.getRunState()) { markedComplete = true; error = true; errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly?"; } setJobComplete(cmr, markedComplete, error, j.mapProgress(), j.reduceProgress(), errorMessage); break; // (from mini loop over hadoop jobs, not main loop over infinite tasks) } } if (!bFound) { // Possible error //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #2."); } } } else // this job hasn't been started yet: { //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #1."); } } //job was not done, need to set flag back if (!markedComplete) { incompleteJobsMap.put(cmr._id, cmr.jobidS); } cmr = getJobsToMakeComplete(); } } catch (Exception ex) { _logger.info("job_error_checking_status_message=" + HarvestExceptionUtils.createExceptionMessage(ex)); } catch (Error err) { // Really really want to get to the next line of code, and clear the status... } //set all incomplete jobs back for (ObjectId id : incompleteJobsMap.keySet()) { BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id)); DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id), new BasicDBObject(MongoDbManager.set_, update)); } }
From source file:com.ikanow.infinit.e.processing.custom.CustomProcessingController.java
License:Open Source License
public boolean checkRunningJobs(CustomMapReduceJobPojo jobOverride) { Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>(); //get mongo entries that have jobids? try {//from ww w. j av a 2 s.com JobClient jc = null; CustomMapReduceJobPojo cmr = jobOverride; if (null == cmr) cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); else if (null == cmr.jobidS) return true; while (cmr != null) { boolean markedComplete = false; //make sure its an actual ID, we now set jobidS to "" when running the job if (!cmr.jobidS.equals("")) // non null by construction { if (null == jc) { try { jc = new JobClient(InfiniteHadoopUtils.getJobClientConnection(prop_custom), new Configuration()); } catch (Exception e) { // Better delete this, no idea what's going on.... _logger.info( "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id=" + cmr._id.toString() + " job_update_status_error_message=Skipping job: " + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce"); _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error (check configuration in /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?)."); incompleteJobsMap.remove(cmr._id); cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); continue; } } //check if job is done, and update if it is JobStatus[] jobs = jc.getAllJobs(); boolean bFound = false; for (JobStatus j : jobs) { if (j.getJobID().getJtIdentifier().equals(cmr.jobidS) && j.getJobID().getId() == cmr.jobidN) { bFound = true; boolean error = false; markedComplete = j.isJobComplete(); String errorMessage = null; if (JobStatus.FAILED == j.getRunState()) { markedComplete = true; error = true; errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly? " + j.getFailureInfo(); } _statusManager.setJobComplete(cmr, markedComplete, error, j.mapProgress(), j.reduceProgress(), errorMessage); break; // (from mini loop over hadoop jobs, not main loop over infinite tasks) } } if (!bFound) { // Possible error //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #2."); } } } else // this job hasn't been started yet: { //check if its been longer than 5min and mark job as complete (it failed to launch) Date currDate = new Date(); Date lastDate = cmr.lastRunTime; //if its been more than 5 min (5m*60s*1000ms) if (currDate.getTime() - lastDate.getTime() > 300000) { markedComplete = true; _statusManager.setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #1."); } } //job was done, remove flag if (markedComplete) { incompleteJobsMap.remove(cmr._id); } if (null == jobOverride) cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap); else cmr = null; } } catch (Exception ex) { _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(ex)); } catch (Error err) { // Really really want to get to the next line of code, and clear the status... _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(err)); } if (null == jobOverride) { //set all incomplete jobs' status back for (ObjectId id : incompleteJobsMap.keySet()) { BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id)); DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id), new BasicDBObject(MongoDbManager.set_, update)); } } return incompleteJobsMap.isEmpty(); }
From source file:com.impetus.ankush2.hadoop.monitor.JobStatusProvider.java
License:Open Source License
/** * @param jobClient/*www .ja va 2 s . com*/ * @param jobSts * @return * @throws IOException */ private Map<String, Object> getJobReport(JobStatus jobSts) throws IOException { // Creating an empty map for storing job information Map<String, Object> jobReport = new HashMap<String, Object>(); // Returns the jobid of the Job org.apache.hadoop.mapred.JobID jobId = jobSts.getJobID(); // Get an RunningJob object to track an ongoing Map-Reduce // job. RunningJob job = jobClient.getJob(jobId); String jobName = ""; if (job != null) { // Get the name of the job. jobName = job.getJobName(); } // Percentage of progress in maps float mapProgress = jobSts.mapProgress() * 100; // Percentage of progress in reduce float reduceProgress = jobSts.reduceProgress() * 100; int mapTotal = 0; int reduceTotal = 0; int mapComp = 0; int reduceComp = 0; // Count for Map and Reduce Complete try { // Get the information of the current state of the map // tasks of a job TaskReport[] mapTaskReports = jobClient.getMapTaskReports(jobId); // Get the total map mapTotal = mapTaskReports.length; // Iterating over the map tasks for (TaskReport taskReport : mapTaskReports) { // The current state of a map TaskInProgress as seen // by the JobTracker. TIPStatus currentStatus = taskReport.getCurrentStatus(); if (currentStatus == TIPStatus.COMPLETE) { mapComp++; } } // Get the information of the current state of the // reduce tasks of a job. TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobId); // Get the total reduce reduceTotal = reduceTaskReport.length; // Iterating over the reduce tasks for (TaskReport taskReport : reduceTaskReport) { // The current state of a reduce TaskInProgress as // seen by the JobTracker. TIPStatus currentStatus = taskReport.getCurrentStatus(); if (currentStatus == TIPStatus.COMPLETE) { reduceComp++; } } } catch (Exception e) { LOG.error(e.getMessage(), e); } // Percentage of progress in setup float setupProgress = jobSts.setupProgress() * 100; // The progress made on cleanup float cleanupProgress = jobSts.cleanupProgress() * 100; // gets any available info on the reason of failure of the // job..Returns the diagnostic information on why a job // might have failed. String failureInfo = jobSts.getFailureInfo(); // Putting Job Sttaus information in map jobReport.put("jobId", jobId.toString()); jobReport.put("jobName", jobName); jobReport.put("jobPriority", jobSts.getJobPriority().toString()); jobReport.put("jobStartTime", jobSts.getStartTime()); jobReport.put("userName", jobSts.getUsername()); jobReport.put("jobComplete", jobSts.isJobComplete()); jobReport.put("mapProgress", mapProgress); jobReport.put("reduceProgress", reduceProgress); jobReport.put("mapTotal", mapTotal); jobReport.put("reduceTotal", reduceTotal); jobReport.put("mapCompleted", mapComp); jobReport.put("reduceCompleted", reduceComp); jobReport.put("setupProgress", setupProgress); jobReport.put("cleanupProgress", cleanupProgress); jobReport.put("schedulingInfo", jobSts.getSchedulingInfo()); jobReport.put("jobState", JobStatus.getJobRunState(jobSts.getRunState())); jobReport.put("failureInfo", failureInfo); jobReport.put("jobFile", job.getJobFile()); jobReport.put("trackingURL", job.getTrackingURL()); jobReport.putAll(getDetailedJobReport(jobId)); return jobReport; }
From source file:dataload.LogFetchJobTracker.java
License:Apache License
/** * This method is the method run by the timer */// w ww. j a v a2s. c om public void run() { try { for (JobStatus stat : client.getAllJobs()) { if (stat.getUsername().equals("data_svc") && !jobsSeen.contains(stat.getJobID().toString())) { totalTime = 0; makeTable(stat.getJobID()); writeMapAndReduceCounters(stat.getJobID()); getJobParameters(stat.getJobID()); jobsSeen.add(stat.getJobID().toString()); } } } catch (SQLException e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage()); } catch (IOException e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage()); } }
From source file:eu.scape_project.tb.hadoopjobtracker.HadoobJobTrackerClient.java
License:Apache License
public HDJobStatus[] UUIDJobs(String UUID) { JobStatus[] allHadoopJobs = null;/*from w w w.ja v a 2s. c om*/ try { allHadoopJobs = myJobClient.getAllJobs(); } catch (IOException ex) { logger.error("Error retrieving ALL jobs from JobTracker. ERR: " + ex.getMessage()); } int numberOfJobs = allHadoopJobs.length; logger.info("Number of ALL jobs on the cluster: " + numberOfJobs); logger.info("Searching for jobs containing the UUID: '" + UUID + "'"); ArrayList<HDJobStatus> allUUIDJobs = new ArrayList<HDJobStatus>(); if (numberOfJobs > 0) { for (JobStatus singleJobStatus : allHadoopJobs) { JobID singleJobID = singleJobStatus.getJobID(); String singleJobName = "N/A"; try { singleJobName = getJobName(myJobClient, singleJobID); } catch (IOException ex) { logger.error("Error retrieving jobName for job " + singleJobID + ". ERR: " + ex.getMessage()); } if ((singleJobName.toLowerCase().indexOf(UUID.toLowerCase()) >= 0) || (UUID.equals(""))) { HDJobStatus newJobStatus = new HDJobStatus(); newJobStatus.setJobID(singleJobStatus.getJobID().toString()); newJobStatus.setJobIsComplete(singleJobStatus.isJobComplete()); newJobStatus.setJobUserName(singleJobStatus.getUsername()); newJobStatus.setJobFailureInfo(singleJobStatus.getFailureInfo()); newJobStatus.setJobName(singleJobName); newJobStatus.setJobMapProgress(Math.round(singleJobStatus.mapProgress() * 100)); newJobStatus.setJobReduceProgress(Math.round(singleJobStatus.reduceProgress() * 100)); allUUIDJobs.add(newJobStatus); } } } return allUUIDJobs.toArray(new HDJobStatus[allUUIDJobs.size()]); }
From source file:org.apache.accumulo.server.master.CoordinateRecoveryTask.java
License:Apache License
void cleanupOldJobs() { try {//ww w . j a v a2 s .c o m Configuration conf = CachedConfiguration.getInstance(); @SuppressWarnings("deprecation") JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf)); for (JobStatus status : jc.getAllJobs()) { if (!status.isJobComplete()) { RunningJob job = jc.getJob(status.getJobID()); if (job.getJobName().equals(LogSort.getJobName())) { log.info("found a running " + job.getJobName()); Configuration jobConfig = new Configuration(false); log.info("fetching configuration from " + job.getJobFile()); jobConfig.addResource(TraceFileSystem .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration())) .open(new Path(job.getJobFile()))); if (HdfsZooInstance.getInstance().getInstanceID() .equals(jobConfig.get(LogSort.INSTANCE_ID_PROPERTY))) { log.info("Killing job " + job.getID().toString()); } } } } FileStatus[] children = fs.listStatus(new Path(ServerConstants.getRecoveryDir())); if (children != null) { for (FileStatus child : children) { log.info("Deleting recovery directory " + child); fs.delete(child.getPath(), true); } } } catch (IOException e) { log.error("Error cleaning up old Log Sort jobs" + e); } catch (Exception e) { log.error("Unknown error cleaning up old jobs", e); } }
From source file:org.apache.hcatalog.templeton.ListDelegator.java
License:Apache License
public List<String> run(String user) throws NotAuthorizedException, BadParam, IOException, InterruptedException { UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user); TempletonJobTracker tracker = null;// w ww . j a v a 2 s .c o m try { tracker = new TempletonJobTracker(appConf); ArrayList<String> ids = new ArrayList<String>(); JobStatus[] jobs = tracker.getAllJobs(); if (jobs != null) { for (JobStatus job : jobs) { JobState state = null; try { String id = job.getJobID().toString(); state = new JobState(id, Main.getAppConfigInstance()); if (user.equals(state.getUser())) ids.add(id); } finally { if (state != null) { state.close(); } } } } return ids; } catch (IllegalStateException e) { throw new BadParam(e.getMessage()); } finally { if (tracker != null) tracker.close(); } }
From source file:org.apache.hive.hcatalog.templeton.ListDelegator.java
License:Apache License
public List<String> run(String user, boolean showall) throws NotAuthorizedException, BadParam, IOException, InterruptedException { UserGroupInformation ugi = UgiFactory.getUgi(user); WebHCatJTShim tracker = null;/*from www.ja va2s .c o m*/ try { tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); ArrayList<String> ids = new ArrayList<String>(); JobStatus[] jobs = tracker.getAllJobs(); if (jobs != null) { for (JobStatus job : jobs) { String id = job.getJobID().toString(); if (showall || user.equals(job.getUsername())) ids.add(id); } } return ids; } catch (IllegalStateException e) { throw new BadParam(e.getMessage()); } finally { if (tracker != null) tracker.close(); } }