List of usage examples for org.apache.hadoop.mapred RunningJob getTaskCompletionEvents
public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom) throws IOException;
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException { super.abortJob(context, runState); final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); final RunningJob job = jobClient.getJob( (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id"))); String diag = ""; for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch (event.getTaskStatus()) { case SUCCEEDED: break; case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n"; for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n"; diag += "\n"; break; }/*from w ww. j a v a 2 s . co m*/ updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0)); }
From source file:com.ibm.jaql.lang.expr.hadoop.Util.java
License:Apache License
public static void logAllTaskSyslogs(RunningJob rj, boolean onlySuccessful) throws Exception { String fetch = System.getProperty(FETCH_SYSLOG_PROP, "false"); if (fetch.equals("false")) return;/*from w ww. jav a 2 s . c o m*/ TaskCompletionEvent[] events = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent event : events) { if (onlySuccessful && (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED)) { // print the syslog into the main log STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } else { STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } } }
From source file:com.liveramp.cascading_ext.flow.LoggingFlow.java
License:Apache License
private String logJobErrors() { boolean exceptions = false; StringBuilder jobErrors = new StringBuilder(); final String divider = StringUtils.repeat("-", 80); logAndAppend(jobErrors, divider);//w ww . ja v a2 s . c o m try { List<FlowStepStats> stepStats = getFlowStats().getFlowStepStats(); Set<String> jobFailures = new HashSet<String>(); for (FlowStepStats stat : stepStats) { try { RunningJob job = ((HadoopStepStats) stat).getRunningJob(); TaskCompletionEvent[] events = job.getTaskCompletionEvents(0); ArrayList<TaskCompletionEvent> failures = new ArrayList<TaskCompletionEvent>(); for (TaskCompletionEvent event : events) { if (event.getTaskStatus() == Status.FAILED) { failures.add(event); } } // We limit the number of potential logs being pulled to spare the jobtracker if (failures.size() > 0) { Collections.shuffle(failures); for (int i = 0; i < FAILURES_TO_QUERY; i++) { jobFailures.add(getFailureLog(failures.get(i))); } } } catch (Exception e) { exceptions = true; } } if (exceptions) { logAndAppend(jobErrors, "unable to retrieve failures from all completed steps!"); logAndAppend(jobErrors, "successfully retrieved job failures: " + StringUtils.join(jobFailures, ", ")); } else { logAndAppend(jobErrors, "step attempt failures: " + StringUtils.join(jobFailures, ", ")); } } catch (Exception e) { logAndAppend(jobErrors, "unable to retrieve any failures from steps"); logAndAppend(jobErrors, e.toString()); } logAndAppend(jobErrors, divider); return jobErrors.toString(); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public JobStatus getStatus(Submission submission) throws NotFoundException, InternalException { RunningJob job = getJob(submission); JobConf conf = loadJobConfiguration(job); JobStatus status = new JobStatus(); status.setInfo(getInfo(submission, job, conf)); try {//w ww . j a v a 2s.co m JobClient client = new JobClient(new JobConf()); // Get job state // Thanks to the mentally handicapped switch statement, we have // to use a chain of ifs. Fuck Java. int jobState = job.getJobState(); if (jobState == org.apache.hadoop.mapred.JobStatus.FAILED) status.setState(State.FAILED); else if (jobState == org.apache.hadoop.mapred.JobStatus.SUCCEEDED) status.setState(State.SUCCESSFUL); else if (jobState == org.apache.hadoop.mapred.JobStatus.KILLED) status.setState(State.KILLED); else if (jobState == org.apache.hadoop.mapred.JobStatus.RUNNING) status.setState(State.RUNNING); else status.setState(State.PREP); // Get task counts TaskReport[] mapTaskReports = client.getMapTaskReports(job.getID()); TaskReport[] reduceTaskReports = client.getReduceTaskReports(job.getID()); // Get failed task logs TaskCompletionEvent[] events = job.getTaskCompletionEvents(0); Pair<ArrayList<TaskLog>, ArrayList<TaskLog>> failures; if (events != null) failures = getLogsFromCompletionEvents(events); else failures = getLogsFromHistory(job, new Configuration()); ArrayList<TaskLog> mapFailures = failures.first; ArrayList<TaskLog> reduceFailures = failures.second; // Get other mapper info PhaseStatus mapStatus = new PhaseStatus(); mapStatus.setProgress(job.mapProgress() * 100); if (!mapFailures.isEmpty()) mapStatus.setErrors(getMeaningfulTaskLog(mapFailures)); if (mapTaskReports != null) mapStatus.setTotalTasks(mapTaskReports.length); // TODO: Handle the state in a sane way mapStatus.setState(status.getState()); status.setMapStatus(mapStatus); // Get other reducer info PhaseStatus reduceStatus = new PhaseStatus(); reduceStatus.setProgress(job.reduceProgress() * 100); if (!reduceFailures.isEmpty()) reduceStatus.setErrors(getMeaningfulTaskLog(reduceFailures)); reduceStatus.setState(status.getState()); if (reduceTaskReports != null) reduceStatus.setTotalTasks(reduceTaskReports.length); if (conf != null) reduceStatus.setOutputPath(FileOutputFormat.getOutputPath(conf).toString()); status.setReduceStatus(reduceStatus); } catch (Exception ex) { throw JobServiceHandler.wrapException("Could not get job info.", ex); } return status; }
From source file:org.apache.falcon.logging.DefaultTaskLogRetriever.java
License:Apache License
@Override public List<String> retrieveTaskLogURL(String jobId) throws IOException { JobConf jobConf = new JobConf(getConf()); JobClient jobClient = new JobClient(jobConf); RunningJob job = jobClient.getJob(JobID.forName(jobId)); if (job == null) { LOG.warn("No running job for job id: {}", jobId); return getFromHistory(jobId); }/* ww w .ja va 2 s. c om*/ List<String> taskLogUrls = new ArrayList<String>(); TaskCompletionEvent[] tasks = job.getTaskCompletionEvents(0); // 0th even is setup, 1 event is launcher, 2 event is cleanup if (tasks != null && tasks.length == 3 && tasks[1] != null) { taskLogUrls.add(tasks[1].getTaskTrackerHttp() + "/tasklog?attemptid=" + tasks[1].getTaskAttemptId() + "&all=true"); return taskLogUrls; } else { LOG.warn("No running task for job: {}", jobId); return getFromHistory(jobId); } }
From source file:org.apache.ivory.logging.LogMover.java
License:Apache License
private String getTTlogURL(String jobId) throws Exception { JobConf jobConf = new JobConf(getConf()); JobClient jobClient = new JobClient(jobConf); RunningJob job = jobClient.getJob(JobID.forName(jobId)); if (job == null) { LOG.warn("No running job for job id: " + jobId); return null; }//from w w w .ja v a 2 s . c om TaskCompletionEvent[] tasks = job.getTaskCompletionEvents(0); // 0th even is setup, 1 event is launcher, 2 event is cleanup if (tasks != null && tasks.length == 3 && tasks[1] != null) { return tasks[1].getTaskTrackerHttp() + "/tasklog?attemptid=" + tasks[1].getTaskAttemptId() + "&all=true"; } else { LOG.warn("No running task for job: " + jobId); } return null; }
From source file:org.estado.core.JobStatusChecker.java
License:Apache License
private List<TaskStatus> getTaskDetails(RunningJob job) { TaskCompletionEvent[] tasks = new TaskCompletionEvent[0]; List<TaskStatus> taskStatusList = new ArrayList<TaskStatus>(); try {// www.j a v a2 s. co m tasks = job.getTaskCompletionEvents(0); for (TaskCompletionEvent task : tasks) { TaskStatus taskStatus = new TaskStatus(); taskStatus.setTaskId(task.getTaskAttemptId().toString()); taskStatus.setStatus(task.getTaskStatus().toString()); taskStatus.setDuration(task.getTaskRunTime() * 1L); //change to long taskStatus.setTaskType(task.isMapTask() ? "Map" : "Reduce"); if (!task.getTaskStatus().equals(TaskCompletionEvent.Status.SUCCEEDED)) { String url = task.getTaskTrackerHttp() + "/tasklog?attemptid=" + task.getTaskAttemptId() + "&all=true"; URLConnection connection = new URL(url).openConnection(); connection.setDoOutput(true); connection.connect(); Scanner s = new java.util.Scanner(connection.getInputStream()).useDelimiter("\\A"); String log = s.hasNext() ? s.next() : ""; taskStatus.setLog(log); } taskStatusList.add(taskStatus); } } catch (IOException e) { e.printStackTrace(); } return taskStatusList; }
From source file:org.godhuli.rhipe.FileUtils.java
License:Apache License
public REXP getstatus(String jd, boolean geterrors) throws Exception { org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd); if (jj == null) throw new IOException("Jobtracker could not find jobID: " + jd); org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj); if (rj == null) throw new IOException( "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) "); String jobfile = rj.getJobFile(); String jobname = rj.getJobName(); // cfg.addResource(new Path(jobfile)); org.apache.hadoop.mapred.Counters cc = rj.getCounters(); long startsec = getStart(jclient, jj); double dura = ((double) System.currentTimeMillis() - startsec) / 1000; REXP ro = FileUtils.buildlistFromOldCounter(cc, dura); int jobs = rj.getJobState(); String jobss = null;//from w w w . j ava 2 s .c o m if (jobs == JobStatus.FAILED) jobss = "FAILED"; else if (jobs == JobStatus.KILLED) jobss = "KILLED"; else if (jobs == JobStatus.PREP) jobss = "PREP"; else if (jobs == JobStatus.RUNNING) jobss = "RUNNING"; else if (jobs == JobStatus.SUCCEEDED) jobss = "SUCCEEDED"; float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress(); org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj); org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj); int totalmaps = maptr.length, totalreds = redtr.length; int mappending = 0, redpending = 0, maprunning = 0, redrunning = 0, redfailed = 0, redkilled = 0, mapkilled = 0, mapfailed = 0, mapcomp = 0, redcomp = 0; for (int i = 0; i < maptr.length; i++) { TIPStatus t = maptr[i].getCurrentStatus(); switch (t) { case COMPLETE: mapcomp++; break; case FAILED: mapfailed++; break; case PENDING: mappending++; break; case RUNNING: maprunning++; break; case KILLED: mapkilled++; break; } } for (int i = 0; i < redtr.length; i++) { TIPStatus t = redtr[i].getCurrentStatus(); switch (t) { case COMPLETE: redcomp++; break; case FAILED: redfailed++; break; case PENDING: redpending++; break; case RUNNING: redrunning++; break; case KILLED: redkilled++; break; } } int reduceafails = 0, reduceakilled = 0, mapafails = 0, mapakilled = 0; int startfrom = 0; REXP.Builder errcontainer = REXP.newBuilder(); errcontainer.setRclass(REXP.RClass.STRING); while (true) { org.apache.hadoop.mapred.TaskCompletionEvent[] events = rj.getTaskCompletionEvents(startfrom); for (int i = 0; i < events.length; i++) { org.apache.hadoop.mapred.TaskCompletionEvent e = events[i]; int f = 0, k = 0; switch (e.getTaskStatus()) { case KILLED: if (e.isMapTask()) { mapakilled++; } else { reduceakilled++; } break; case TIPFAILED: case FAILED: if (e.isMapTask()) { mapafails++; } else { reduceafails++; } if (geterrors) { REXPProtos.STRING.Builder content = REXPProtos.STRING.newBuilder(); String[] s = rj.getTaskDiagnostics(e.getTaskAttemptId()); if (s != null && s.length > 0) { content.setStrval(s[0]); errcontainer.addStringValue(content.build()); } } break; } } startfrom += events.length; if (events.length == 0) break; } REXP.Builder thevals = REXP.newBuilder(); thevals.setRclass(REXP.RClass.LIST); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss })); thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { dura })); thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog })); thevals.addRexpValue(RObjects.buildIntVector( new int[] { totalmaps, mappending, maprunning, mapcomp, mapkilled, mapafails, mapakilled })); thevals.addRexpValue(RObjects.buildIntVector( new int[] { totalreds, redpending, redrunning, redcomp, redkilled, reduceafails, reduceakilled })); thevals.addRexpValue(ro); thevals.addRexpValue(errcontainer); thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL())); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname })); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile })); return (thevals.build()); }
From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java
License:Apache License
/** * Log messages indicating completion (success/failure) of component tasks for the provided running job. * * @param runningJob Running job to poll for completion events * @param startIndex Start at this event index to poll from * @return Total events consumed/*from w ww. ja va2 s .co m*/ * @throws IOException Error fetching events */ private int logTaskMessages(RunningJob runningJob, int startIndex) throws IOException { TaskCompletionEvent[] tcEvents = runningJob.getTaskCompletionEvents(startIndex); for (int i = 0; i < tcEvents.length; i++) { String[] diags = runningJob.getTaskDiagnostics(tcEvents[i].getTaskAttemptId()); StringBuilder diagsOutput = new StringBuilder(); if (diags != null && diags.length > 0) { diagsOutput.append(Const.CR); for (String s : diags) { diagsOutput.append(s); diagsOutput.append(Const.CR); } } switch (tcEvents[i].getTaskStatus()) { case KILLED: { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.KILLED, tcEvents[i].getTaskAttemptId().getTaskID().getId(), tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput)); } break; case FAILED: { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.FAILED, tcEvents[i].getTaskAttemptId().getTaskID().getId(), tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput)); } break; case SUCCEEDED: { logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.SUCCEEDED, tcEvents[i].getTaskAttemptId().getTaskID().getId(), tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput)); } break; } } return tcEvents.length; }