List of usage examples for org.apache.hadoop.mapreduce TaskReport getRunningTaskAttemptIds
public Collection<TaskAttemptID> getRunningTaskAttemptIds()
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceMetricsWriter.java
License:Apache License
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException { JobStatus jobStatus = jobConf.getStatus(); // map stats/*from w w w.ja v a2 s. co m*/ float mapProgress = jobStatus.getMapProgress(); int runningMappers = 0; int runningReducers = 0; for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) { reportMapTaskMetrics(tr); runningMappers += tr.getRunningTaskAttemptIds().size(); } for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) { reportReduceTaskMetrics(tr); runningReducers += tr.getRunningTaskAttemptIds().size(); } int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB); int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB); long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS); long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS); long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES); mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100)); mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords); mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers); mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper); LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper); // reduce stats float reduceProgress = jobStatus.getReduceProgress(); long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS); long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS); reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100)); reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers); reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer); LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer); }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusChecker.java
License:Apache License
/** * Check the status of a task//from w ww. j a v a 2 s .c om * * @param taskReport * @param currTime * @return true if the task is well-behaved */ boolean checkTask(TaskType taskType, TaskReport taskReport, long currTime) { metrics.inspectedTasks.incr(); switch (taskType) { case MAP: metrics.inspectedMappers.incr(); break; case REDUCE: metrics.inspectedReducers.incr(); break; default: } boolean okTask = appConf.getTaskPolicy().checkTask(appReport, taskType, taskReport, appConf, currTime); if (!okTask) LOG.error(taskReport.getTaskId() + " identified as BAD"); else LOG.debug(taskReport.getTaskId() + " passes the check"); if (okTask) return true; //else // the task is potentially problematic, check the attempts to make sure Collection<TaskAttemptID> attemptIds = taskReport.getRunningTaskAttemptIds(); LOG.debug(taskReport.getTaskId() + " has " + attemptIds.size() + " attempts, checking on them..."); for (TaskAttemptID attemptId : attemptIds) { String xmlUrl = buildXmlUrl(taskReport, attemptId); Document taskAttemptXml; try { taskAttemptXml = RestClient.getInstance().getXml(xmlUrl); } catch (RestException e) { LOG.error("Error in connecting to REST api from " + xmlUrl, e); return false; } String errMsg = appConf.getTaskPolicy().checkTaskAttempt(appReport, taskType, taskReport, appConf, attemptId, taskAttemptXml, currTime); if (errMsg != null) killTaskAttempt(taskReport, taskType, attemptId, errMsg); else LOG.debug("LET the task " + attemptId + " run."); } return false; }