List of usage examples for org.apache.hadoop.mapred TaskCompletionEvent getTaskStatus
public Status getTaskStatus()
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException { super.abortJob(context, runState); final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); final RunningJob job = jobClient.getJob( (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id"))); String diag = ""; for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch (event.getTaskStatus()) { case SUCCEEDED: break; case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n"; for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n"; diag += "\n"; break; }//from w w w. ja va2 s . co m updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0)); }
From source file:cascading.flow.hadoop.HadoopStepStats.java
License:Open Source License
private void addTaskStats(TaskCompletionEvent[] events) { for (TaskCompletionEvent event : events) { if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) getTaskStats().add(new HadoopTaskStats(event)); }//from w w w .j a va 2 s . c om }
From source file:com.ibm.jaql.lang.expr.hadoop.Util.java
License:Apache License
public static void logAllTaskSyslogs(RunningJob rj, boolean onlySuccessful) throws Exception { String fetch = System.getProperty(FETCH_SYSLOG_PROP, "false"); if (fetch.equals("false")) return;// w w w. j a va 2 s . co m TaskCompletionEvent[] events = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent event : events) { if (onlySuccessful && (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED)) { // print the syslog into the main log STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } else { STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } } }
From source file:com.liveramp.cascading_ext.flow.LoggingFlow.java
License:Apache License
private String logJobErrors() { boolean exceptions = false; StringBuilder jobErrors = new StringBuilder(); final String divider = StringUtils.repeat("-", 80); logAndAppend(jobErrors, divider);/* w w w . ja v a 2s. c o m*/ try { List<FlowStepStats> stepStats = getFlowStats().getFlowStepStats(); Set<String> jobFailures = new HashSet<String>(); for (FlowStepStats stat : stepStats) { try { RunningJob job = ((HadoopStepStats) stat).getRunningJob(); TaskCompletionEvent[] events = job.getTaskCompletionEvents(0); ArrayList<TaskCompletionEvent> failures = new ArrayList<TaskCompletionEvent>(); for (TaskCompletionEvent event : events) { if (event.getTaskStatus() == Status.FAILED) { failures.add(event); } } // We limit the number of potential logs being pulled to spare the jobtracker if (failures.size() > 0) { Collections.shuffle(failures); for (int i = 0; i < FAILURES_TO_QUERY; i++) { jobFailures.add(getFailureLog(failures.get(i))); } } } catch (Exception e) { exceptions = true; } } if (exceptions) { logAndAppend(jobErrors, "unable to retrieve failures from all completed steps!"); logAndAppend(jobErrors, "successfully retrieved job failures: " + StringUtils.join(jobFailures, ", ")); } else { logAndAppend(jobErrors, "step attempt failures: " + StringUtils.join(jobFailures, ", ")); } } catch (Exception e) { logAndAppend(jobErrors, "unable to retrieve any failures from steps"); logAndAppend(jobErrors, e.toString()); } logAndAppend(jobErrors, divider); return jobErrors.toString(); }
From source file:datafu.hourglass.jobs.StagedOutputJob.java
License:Apache License
/** * Writes Hadoop counters and other task statistics to a file in the file system. * /*w w w .java 2 s . c o m*/ * @param fs * @throws IOException */ private void writeCounters(final FileSystem fs) throws IOException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss"); String suffix = timestampFormat.format(new Date()); if (_countersParentPath != null) { if (!fs.exists(_countersParentPath)) { _log.info("Creating counter parent path " + _countersParentPath); fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x")); } // make the name as unique as possible in this case because this may be a directory // where other counter files will be dropped _countersPath = new Path(_countersParentPath, ".counters." + suffix); } else { _countersPath = new Path(actualOutputPath, ".counters." + suffix); } _log.info(String.format("Writing counters to %s", _countersPath)); FSDataOutputStream counterStream = fs.create(_countersPath); BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024); OutputStreamWriter writer = new OutputStreamWriter(buffer); for (String groupName : getCounters().getGroupNames()) { for (Counter counter : getCounters().getGroup(groupName)) { writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue())); } } JobID jobID = this.getJobID(); org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(), jobID.getId()); long minStart = Long.MAX_VALUE; long maxFinish = 0; long setupStart = Long.MAX_VALUE; long cleanupFinish = 0; DescriptiveStatistics mapStats = new DescriptiveStatistics(); DescriptiveStatistics reduceStats = new DescriptiveStatistics(); boolean success = true; JobClient jobClient = new JobClient(this.conf); Map<String, String> taskIdToType = new HashMap<String, String>(); TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId); if (setupReports.length > 0) { _log.info("Processing setup reports"); for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) { taskIdToType.put(report.getTaskID().toString(), "SETUP"); if (report.getStartTime() == 0) { _log.warn("Skipping report with zero start time"); continue; } setupStart = Math.min(setupStart, report.getStartTime()); } } else { _log.error("No setup reports"); } TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId); if (mapReports.length > 0) { _log.info("Processing map reports"); for (TaskReport report : mapReports) { taskIdToType.put(report.getTaskID().toString(), "MAP"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } minStart = Math.min(minStart, report.getStartTime()); mapStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No map reports"); } TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId); if (reduceReports.length > 0) { _log.info("Processing reduce reports"); for (TaskReport report : reduceReports) { taskIdToType.put(report.getTaskID().toString(), "REDUCE"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } maxFinish = Math.max(maxFinish, report.getFinishTime()); reduceStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No reduce reports"); } TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId); if (cleanupReports.length > 0) { _log.info("Processing cleanup reports"); for (TaskReport report : cleanupReports) { taskIdToType.put(report.getTaskID().toString(), "CLEANUP"); if (report.getFinishTime() == 0) { _log.warn("Skipping report with finish time of zero"); continue; } cleanupFinish = Math.max(cleanupFinish, report.getFinishTime()); } } else { _log.error("No cleanup reports"); } if (minStart == Long.MAX_VALUE) { _log.error("Could not determine map-reduce start time"); success = false; } if (maxFinish == 0) { _log.error("Could not determine map-reduce finish time"); success = false; } if (setupStart == Long.MAX_VALUE) { _log.error("Could not determine setup start time"); success = false; } if (cleanupFinish == 0) { _log.error("Could not determine cleanup finish time"); success = false; } // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup. // Unfortunately the job client doesn't have an easier way to get these statistics. Map<String, Integer> attemptStats = new HashMap<String, Integer>(); _log.info("Processing task attempts"); for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) { String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString()); String status = event.getTaskStatus().toString(); String key = String.format("%s_%s_ATTEMPTS", status, type); if (!attemptStats.containsKey(key)) { attemptStats.put(key, 0); } attemptStats.put(key, attemptStats.get(key) + 1); } if (success) { writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart)); writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish)); writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart)); writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart)); writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish)); writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart)); writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN())); writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax())); writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin())); writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean())); writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation())); writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum())); writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN())); writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax())); writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin())); writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean())); writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation())); writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum())); writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d", (long) mapStats.getSum() + (long) reduceStats.getSum())); for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) { writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue())); } } writer.close(); buffer.close(); counterStream.close(); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
private Pair<ArrayList<TaskLog>, ArrayList<TaskLog>> getLogsFromCompletionEvents(TaskCompletionEvent[] events) { ArrayList<TaskLog> mapFailures = new ArrayList<TaskLog>(); ArrayList<TaskLog> reduceFailures = new ArrayList<TaskLog>(); for (TaskCompletionEvent event : events) { if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { TaskLog log = new TaskLog(event.getTaskTrackerHttp(), event.getTaskAttemptId()); if (event.isMapTask()) mapFailures.add(log);/*ww w .ja va 2s . c o m*/ else reduceFailures.add(log); } } return new Pair<ArrayList<TaskLog>, ArrayList<TaskLog>>(mapFailures, reduceFailures); }
From source file:io.hops.erasure_coding.MapReduceEncoder.java
License:Apache License
/** * Checks if the map-reduce job has completed. * * @return true if the job completed, false otherwise. * @throws java.io.IOException/*from ww w. j ava 2s.co m*/ */ public boolean checkComplete() throws IOException { JobID jobID = runningJob.getID(); if (runningJob.isComplete()) { // delete job directory final String jobdir = jobconf.get(JOB_DIR_LABEL); if (jobdir != null) { final Path jobpath = new Path(jobdir); jobpath.getFileSystem(jobconf).delete(jobpath, true); } if (runningJob.isSuccessful()) { LOG.info("Job Complete(Succeeded): " + jobID); } else { LOG.info("Job Complete(Failed): " + jobID); } cleanUp(); return true; } else { String report = (" job " + jobID + " map " + StringUtils.formatPercent(runningJob.mapProgress(), 0) + " reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0)); if (!report.equals(lastReport)) { LOG.info(report); lastReport = report; } TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(jobEventCounter); jobEventCounter += events.length; for (TaskCompletionEvent event : events) { if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) { LOG.info(" Job " + jobID + " " + event.toString()); } } return false; } }
From source file:org.estado.core.JobStatusChecker.java
License:Apache License
private List<TaskStatus> getTaskDetails(RunningJob job) { TaskCompletionEvent[] tasks = new TaskCompletionEvent[0]; List<TaskStatus> taskStatusList = new ArrayList<TaskStatus>(); try {// w w w . j av a 2 s . c om tasks = job.getTaskCompletionEvents(0); for (TaskCompletionEvent task : tasks) { TaskStatus taskStatus = new TaskStatus(); taskStatus.setTaskId(task.getTaskAttemptId().toString()); taskStatus.setStatus(task.getTaskStatus().toString()); taskStatus.setDuration(task.getTaskRunTime() * 1L); //change to long taskStatus.setTaskType(task.isMapTask() ? "Map" : "Reduce"); if (!task.getTaskStatus().equals(TaskCompletionEvent.Status.SUCCEEDED)) { String url = task.getTaskTrackerHttp() + "/tasklog?attemptid=" + task.getTaskAttemptId() + "&all=true"; URLConnection connection = new URL(url).openConnection(); connection.setDoOutput(true); connection.connect(); Scanner s = new java.util.Scanner(connection.getInputStream()).useDelimiter("\\A"); String log = s.hasNext() ? s.next() : ""; taskStatus.setLog(log); } taskStatusList.add(taskStatus); } } catch (IOException e) { e.printStackTrace(); } return taskStatusList; }
From source file:org.godhuli.rhipe.FileUtils.java
License:Apache License
public REXP getstatus(String jd, boolean geterrors) throws Exception { org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd); if (jj == null) throw new IOException("Jobtracker could not find jobID: " + jd); org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj); if (rj == null) throw new IOException( "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) "); String jobfile = rj.getJobFile(); String jobname = rj.getJobName(); // cfg.addResource(new Path(jobfile)); org.apache.hadoop.mapred.Counters cc = rj.getCounters(); long startsec = getStart(jclient, jj); double dura = ((double) System.currentTimeMillis() - startsec) / 1000; REXP ro = FileUtils.buildlistFromOldCounter(cc, dura); int jobs = rj.getJobState(); String jobss = null;//from ww w . j a va2 s. c o m if (jobs == JobStatus.FAILED) jobss = "FAILED"; else if (jobs == JobStatus.KILLED) jobss = "KILLED"; else if (jobs == JobStatus.PREP) jobss = "PREP"; else if (jobs == JobStatus.RUNNING) jobss = "RUNNING"; else if (jobs == JobStatus.SUCCEEDED) jobss = "SUCCEEDED"; float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress(); org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj); org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj); int totalmaps = maptr.length, totalreds = redtr.length; int mappending = 0, redpending = 0, maprunning = 0, redrunning = 0, redfailed = 0, redkilled = 0, mapkilled = 0, mapfailed = 0, mapcomp = 0, redcomp = 0; for (int i = 0; i < maptr.length; i++) { TIPStatus t = maptr[i].getCurrentStatus(); switch (t) { case COMPLETE: mapcomp++; break; case FAILED: mapfailed++; break; case PENDING: mappending++; break; case RUNNING: maprunning++; break; case KILLED: mapkilled++; break; } } for (int i = 0; i < redtr.length; i++) { TIPStatus t = redtr[i].getCurrentStatus(); switch (t) { case COMPLETE: redcomp++; break; case FAILED: redfailed++; break; case PENDING: redpending++; break; case RUNNING: redrunning++; break; case KILLED: redkilled++; break; } } int reduceafails = 0, reduceakilled = 0, mapafails = 0, mapakilled = 0; int startfrom = 0; REXP.Builder errcontainer = REXP.newBuilder(); errcontainer.setRclass(REXP.RClass.STRING); while (true) { org.apache.hadoop.mapred.TaskCompletionEvent[] events = rj.getTaskCompletionEvents(startfrom); for (int i = 0; i < events.length; i++) { org.apache.hadoop.mapred.TaskCompletionEvent e = events[i]; int f = 0, k = 0; switch (e.getTaskStatus()) { case KILLED: if (e.isMapTask()) { mapakilled++; } else { reduceakilled++; } break; case TIPFAILED: case FAILED: if (e.isMapTask()) { mapafails++; } else { reduceafails++; } if (geterrors) { REXPProtos.STRING.Builder content = REXPProtos.STRING.newBuilder(); String[] s = rj.getTaskDiagnostics(e.getTaskAttemptId()); if (s != null && s.length > 0) { content.setStrval(s[0]); errcontainer.addStringValue(content.build()); } } break; } } startfrom += events.length; if (events.length == 0) break; } REXP.Builder thevals = REXP.newBuilder(); thevals.setRclass(REXP.RClass.LIST); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss })); thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { dura })); thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog })); thevals.addRexpValue(RObjects.buildIntVector( new int[] { totalmaps, mappending, maprunning, mapcomp, mapkilled, mapafails, mapakilled })); thevals.addRexpValue(RObjects.buildIntVector( new int[] { totalreds, redpending, redrunning, redcomp, redkilled, reduceafails, reduceakilled })); thevals.addRexpValue(ro); thevals.addRexpValue(errcontainer); thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL())); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname })); thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile })); return (thevals.build()); }
From source file:org.smartfrog.services.hadoop.mapreduce.submitter.SubmitterImpl.java
License:Open Source License
/** * Handl the end of the job/*from www. j ava 2 s. c o m*/ * * @throws IOException on any failure */ private void processEndOfJob() throws IOException { boolean succeeded = runningJob.isSuccessful(); int taskCount = 0; int failures = 0; String message = "Job " + runningJob.getJobName() + " ID=" + runningJob.getID().toString() + " has " + (succeeded ? " succeeded" : "failed"); StringBuilder builder = new StringBuilder(); TaskCompletionEvent[] history = runningJob.getTaskCompletionEvents(0); for (TaskCompletionEvent event : history) { taskCount++; builder.append(event.isMapTask() ? "\nMap: " : "\nReduce: "); builder.append(event.toString()); if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { failures++; String[] diagnostics = runningJob.getTaskDiagnostics(event.getTaskAttemptId()); for (String line : diagnostics) { builder.append("\n "); builder.append(line); } } builder.append("\n Tasks run :").append(taskCount).append(" failed: ").append(failures); if (!succeeded && dumpOnFailure) { builder.append("Job configuration used"); builder.append(jobConf.dump()); } message = message + builder.toString(); } sfLog().info(message); if (terminateWhenJobFinishes) { TerminationRecord record = succeeded ? TerminationRecord.normal(message, sfCompleteNameSafe()) : TerminationRecord.abnormal(message, sfCompleteNameSafe()); new ComponentHelper(this).targetForTermination(record, false, false); } }