List of usage examples for org.apache.hadoop.mapred TaskCompletionEvent getTaskAttemptId
public TaskAttemptID getTaskAttemptId()
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException { super.abortJob(context, runState); final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); final RunningJob job = jobClient.getJob( (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id"))); String diag = ""; for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch (event.getTaskStatus()) { case SUCCEEDED: break; case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n"; for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n"; diag += "\n"; break; }/*w w w . ja v a 2s . c o m*/ updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0)); }
From source file:com.ibm.jaql.lang.expr.hadoop.Util.java
License:Apache License
public static void logAllTaskSyslogs(RunningJob rj, boolean onlySuccessful) throws Exception { String fetch = System.getProperty(FETCH_SYSLOG_PROP, "false"); if (fetch.equals("false")) return;/*w w w . ja va2 s. c om*/ TaskCompletionEvent[] events = rj.getTaskCompletionEvents(0); for (TaskCompletionEvent event : events) { if (onlySuccessful && (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED)) { // print the syslog into the main log STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } else { STATUS_LOG.info(event.toString()); logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); } } }
From source file:com.liveramp.cascading_ext.flow.LoggingFlow.java
License:Apache License
private static String getFailureLog(TaskCompletionEvent event) { LOG.info("Getting errors for attempt " + event.getTaskAttemptId()); String exception = ""; try {/*from ww w. ja va 2 s .c o m*/ String fullLog = retrieveTaskLogs(event.getTaskAttemptId(), event.getTaskTrackerHttp()); exception = extractErrorFromLogString(fullLog); } catch (IOException e) { LOG.info("Regex Error!", e); } return "\nCluster Log Exception:\n" + exception; }
From source file:datafu.hourglass.jobs.StagedOutputJob.java
License:Apache License
/** * Writes Hadoop counters and other task statistics to a file in the file system. * //from w w w . j ava 2 s.c o m * @param fs * @throws IOException */ private void writeCounters(final FileSystem fs) throws IOException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss"); String suffix = timestampFormat.format(new Date()); if (_countersParentPath != null) { if (!fs.exists(_countersParentPath)) { _log.info("Creating counter parent path " + _countersParentPath); fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x")); } // make the name as unique as possible in this case because this may be a directory // where other counter files will be dropped _countersPath = new Path(_countersParentPath, ".counters." + suffix); } else { _countersPath = new Path(actualOutputPath, ".counters." + suffix); } _log.info(String.format("Writing counters to %s", _countersPath)); FSDataOutputStream counterStream = fs.create(_countersPath); BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024); OutputStreamWriter writer = new OutputStreamWriter(buffer); for (String groupName : getCounters().getGroupNames()) { for (Counter counter : getCounters().getGroup(groupName)) { writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue())); } } JobID jobID = this.getJobID(); org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(), jobID.getId()); long minStart = Long.MAX_VALUE; long maxFinish = 0; long setupStart = Long.MAX_VALUE; long cleanupFinish = 0; DescriptiveStatistics mapStats = new DescriptiveStatistics(); DescriptiveStatistics reduceStats = new DescriptiveStatistics(); boolean success = true; JobClient jobClient = new JobClient(this.conf); Map<String, String> taskIdToType = new HashMap<String, String>(); TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId); if (setupReports.length > 0) { _log.info("Processing setup reports"); for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) { taskIdToType.put(report.getTaskID().toString(), "SETUP"); if (report.getStartTime() == 0) { _log.warn("Skipping report with zero start time"); continue; } setupStart = Math.min(setupStart, report.getStartTime()); } } else { _log.error("No setup reports"); } TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId); if (mapReports.length > 0) { _log.info("Processing map reports"); for (TaskReport report : mapReports) { taskIdToType.put(report.getTaskID().toString(), "MAP"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } minStart = Math.min(minStart, report.getStartTime()); mapStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No map reports"); } TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId); if (reduceReports.length > 0) { _log.info("Processing reduce reports"); for (TaskReport report : reduceReports) { taskIdToType.put(report.getTaskID().toString(), "REDUCE"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } maxFinish = Math.max(maxFinish, report.getFinishTime()); reduceStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No reduce reports"); } TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId); if (cleanupReports.length > 0) { _log.info("Processing cleanup reports"); for (TaskReport report : cleanupReports) { taskIdToType.put(report.getTaskID().toString(), "CLEANUP"); if (report.getFinishTime() == 0) { _log.warn("Skipping report with finish time of zero"); continue; } cleanupFinish = Math.max(cleanupFinish, report.getFinishTime()); } } else { _log.error("No cleanup reports"); } if (minStart == Long.MAX_VALUE) { _log.error("Could not determine map-reduce start time"); success = false; } if (maxFinish == 0) { _log.error("Could not determine map-reduce finish time"); success = false; } if (setupStart == Long.MAX_VALUE) { _log.error("Could not determine setup start time"); success = false; } if (cleanupFinish == 0) { _log.error("Could not determine cleanup finish time"); success = false; } // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup. // Unfortunately the job client doesn't have an easier way to get these statistics. Map<String, Integer> attemptStats = new HashMap<String, Integer>(); _log.info("Processing task attempts"); for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) { String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString()); String status = event.getTaskStatus().toString(); String key = String.format("%s_%s_ATTEMPTS", status, type); if (!attemptStats.containsKey(key)) { attemptStats.put(key, 0); } attemptStats.put(key, attemptStats.get(key) + 1); } if (success) { writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart)); writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish)); writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart)); writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart)); writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish)); writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart)); writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN())); writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax())); writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin())); writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean())); writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation())); writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum())); writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN())); writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax())); writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin())); writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean())); writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation())); writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum())); writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d", (long) mapStats.getSum() + (long) reduceStats.getSum())); for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) { writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue())); } } writer.close(); buffer.close(); counterStream.close(); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
private Pair<ArrayList<TaskLog>, ArrayList<TaskLog>> getLogsFromCompletionEvents(TaskCompletionEvent[] events) { ArrayList<TaskLog> mapFailures = new ArrayList<TaskLog>(); ArrayList<TaskLog> reduceFailures = new ArrayList<TaskLog>(); for (TaskCompletionEvent event : events) { if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { TaskLog log = new TaskLog(event.getTaskTrackerHttp(), event.getTaskAttemptId()); if (event.isMapTask()) mapFailures.add(log);//from w ww.j a v a 2 s. c om else reduceFailures.add(log); } } return new Pair<ArrayList<TaskLog>, ArrayList<TaskLog>>(mapFailures, reduceFailures); }
From source file:org.apache.falcon.logging.TaskLogRetrieverYarn.java
License:Apache License
@Override public List<String> retrieveTaskLogURL(String jobIdStr) throws IOException { List<String> taskLogUrls = new ArrayList<String>(); Configuration conf = getConf(); Cluster cluster = getCluster(conf);/*from w w w.j av a2 s .c o m*/ JobID jobID = JobID.forName(jobIdStr); if (jobID == null) { LOG.warn("External id for workflow action is null"); return null; } if (conf.get(YARN_LOG_SERVER_URL) == null) { LOG.warn("YARN log Server is null"); return null; } try { Job job = cluster.getJob(jobID); if (job != null) { TaskCompletionEvent[] events = job.getTaskCompletionEvents(0); for (TaskCompletionEvent event : events) { LogParams params = cluster.getLogParams(jobID, event.getTaskAttemptId()); String url = (conf.get(YARN_LOG_SERVER_URL).startsWith(SCHEME) ? conf.get(YARN_LOG_SERVER_URL) : SCHEME + conf.get(YARN_LOG_SERVER_URL)) + "/" + event.getTaskTrackerHttp() + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner() + "?start=0"; LOG.info("Task Log URL for the job {} is {}" + jobIdStr, url); taskLogUrls.add(url); } return taskLogUrls; } LOG.warn("Unable to find the job in cluster {}" + jobIdStr); return null; } catch (InterruptedException e) { throw new IOException(e); } }
From source file:org.apache.falcon.logging.v2.TaskLogRetrieverYarn.java
License:Apache License
@Override public List<String> retrieveTaskLogURL(String jobIdStr) throws IOException { List<String> taskLogUrls = new ArrayList<String>(); Configuration conf = getConf(); Cluster cluster = getCluster(conf);// w w w . j av a2s. c o m JobID jobID = JobID.forName(jobIdStr); if (jobID == null) { LOG.warn("External id for workflow action is null"); return null; } try { Job job = cluster.getJob(jobID); if (job != null) { TaskCompletionEvent[] events = job.getTaskCompletionEvents(0); for (TaskCompletionEvent event : events) { LogParams params = cluster.getLogParams(jobID, event.getTaskAttemptId()); String url = SCHEME + conf.get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp() + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner() + "?start=0"; LOG.info("Task Log URL for the job {} is {}" + jobIdStr, url); taskLogUrls.add(url); } return taskLogUrls; } LOG.warn("Unable to find the job in cluster {}" + jobIdStr); return null; } catch (InterruptedException e) { throw new IOException(e); } }
From source file:org.apache.falcon.logging.v2.TaskLogRetrieverYarnTest.java
License:Apache License
@DataProvider(name = "testData") public Object[][] testData() throws IOException, InterruptedException { int samples = getRandomValueInRange(10) + 1; Object[][] resultSet = new Object[samples][2]; for (int count = 0; count < samples; count++) { List<String> expectedResult = new ArrayList<String>(); Cluster cluster = getCluster(getConf()); String jobId = new JobID("job", RANDOM.nextInt(1000)).toString(); boolean success = RANDOM.nextBoolean(); JobID jobID = JobID.forName(jobId); int numEvents = getRandomValueInRange(10) + 1; TaskCompletionEvent[] events = getTaskCompletionEvents(numEvents, jobID); Job job = mock(Job.class); when(cluster.getJob(jobID)).thenReturn(job); when(job.getTaskCompletionEvents(0)).thenReturn(events); for (TaskCompletionEvent event : events) { if (success) { LogParams params = getLogParams(); when(cluster.getLogParams(jobID, event.getTaskAttemptId())).thenReturn(params); String url = SCHEME + getConf().get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp() + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner() + "?start=0"; expectedResult.add(url); } else { when(cluster.getJob(jobID)).thenReturn(null); expectedResult = null;/* www. j a va 2 s .com*/ } resultSet[count] = new Object[] { jobId, expectedResult }; } } return resultSet; }
From source file:org.apache.falcon.oozie.logging.TaskLogRetrieverYarnTest.java
License:Apache License
@DataProvider(name = "testData") public Object[][] testData() throws IOException, InterruptedException { int samples = getRandomValueInRange(10) + 1; Object[][] resultSet = new Object[samples][2]; for (int count = 0; count < samples; count++) { List<String> expectedResult = new ArrayList<String>(); Cluster cluster = getCluster(getConf()); String jobId = new JobID("job", count).toString(); boolean success = random.nextBoolean(); JobID jobID = JobID.forName(jobId); int numEvents = getRandomValueInRange(10) + 1; TaskCompletionEvent[] events = getTaskCompletionEvents(numEvents, jobID); Job job = mock(Job.class); when(cluster.getJob(jobID)).thenReturn(job); when(job.getTaskCompletionEvents(0)).thenReturn(events); for (TaskCompletionEvent event : events) { if (success) { LogParams params = getLogParams(); when(cluster.getLogParams(jobID, event.getTaskAttemptId())).thenReturn(params); String url = SCHEME + getConf().get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp() + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner() + "?start=0"; expectedResult.add(url); } else { when(cluster.getJob(jobID)).thenReturn(null); expectedResult = null;/*w w w .ja va 2 s . c o m*/ break; } } resultSet[count] = new Object[] { jobId, expectedResult }; } return resultSet; }
From source file:org.estado.core.JobStatusChecker.java
License:Apache License
private List<TaskStatus> getTaskDetails(RunningJob job) { TaskCompletionEvent[] tasks = new TaskCompletionEvent[0]; List<TaskStatus> taskStatusList = new ArrayList<TaskStatus>(); try {/*w w w . j a v a 2s . c om*/ tasks = job.getTaskCompletionEvents(0); for (TaskCompletionEvent task : tasks) { TaskStatus taskStatus = new TaskStatus(); taskStatus.setTaskId(task.getTaskAttemptId().toString()); taskStatus.setStatus(task.getTaskStatus().toString()); taskStatus.setDuration(task.getTaskRunTime() * 1L); //change to long taskStatus.setTaskType(task.isMapTask() ? "Map" : "Reduce"); if (!task.getTaskStatus().equals(TaskCompletionEvent.Status.SUCCEEDED)) { String url = task.getTaskTrackerHttp() + "/tasklog?attemptid=" + task.getTaskAttemptId() + "&all=true"; URLConnection connection = new URL(url).openConnection(); connection.setDoOutput(true); connection.connect(); Scanner s = new java.util.Scanner(connection.getInputStream()).useDelimiter("\\A"); String log = s.hasNext() ? s.next() : ""; taskStatus.setLog(log); } taskStatusList.add(taskStatus); } } catch (IOException e) { e.printStackTrace(); } return taskStatusList; }