Example usage for org.apache.hadoop.mapred JobClient getReduceTaskReports

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobClient getReduceTaskReports.

Prototype

@Deprecated
public TaskReport[] getReduceTaskReports(String jobId) throws IOException

Source Link

Usage

From source file:azkaban.jobtype.AzkabanPigListener.java

License:Apache License

@SuppressWarnings("deprecation")
private void addMapReduceJobState(PigJobDagNode node) {
    JobClient jobClient = PigStats.get().getJobClient();

    try {/*from w  w  w . j  a  va  2 s. c o  m*/
        RunningJob runningJob = jobClient.getJob(node.getJobId());
        if (runningJob == null) {
            logger.warn("Couldn't find job status for jobId=" + node.getJobId());
            return;
        }

        JobID jobID = runningJob.getID();
        TaskReport[] mapTaskReport = jobClient.getMapTaskReports(jobID);
        TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobID);
        node.setMapReduceJobState(new MapReduceJobState(runningJob, mapTaskReport, reduceTaskReport));

        if (node.getJobConfiguration() == null) {
            Properties jobConfProperties = StatsUtils.getJobConf(runningJob);
            if (jobConfProperties != null && jobConfProperties.size() > 0) {
                node.setJobConfiguration(jobConfProperties);
            }
        }
    } catch (IOException e) {
        logger.error("Error getting job info.", e);
    }
}

From source file:cascading.flow.hadoop.HadoopStepStats.java

License:Open Source License

@Override
public void captureDetail() {
    getTaskStats().clear();/*from  w w w  . j  a  va 2  s.c  om*/

    JobClient jobClient = getJobClient();

    try {
        addTaskStats(HadoopTaskStats.TaskType.SETUP, jobClient.getSetupTaskReports(getRunningJob().getID()),
                true);
        addTaskStats(HadoopTaskStats.TaskType.MAPPER, jobClient.getMapTaskReports(getRunningJob().getID()),
                false);
        addTaskStats(HadoopTaskStats.TaskType.REDUCER, jobClient.getReduceTaskReports(getRunningJob().getID()),
                false);
        addTaskStats(HadoopTaskStats.TaskType.CLEANUP, jobClient.getCleanupTaskReports(getRunningJob().getID()),
                true);

        int count = 0;

        while (true) {
            TaskCompletionEvent[] events = getRunningJob().getTaskCompletionEvents(count);

            if (events.length == 0)
                break;

            addTaskStats(events);
            count += 10;
        }
    } catch (IOException exception) {
        LOG.warn("unable to get task stats", exception);
    }
}

From source file:co.cask.cdap.app.mapreduce.MRJobClient.java

License:Apache License

/**
 * @param runId for which information will be returned.
 * @return a {@link MRJobInfo} containing information about a particular MapReduce program run.
 * @throws IOException if there is failure to communicate through the JobClient.
 * @throws NotFoundException if a Job with the given runId is not found.
 *//*from   w  w  w .  j a  v a2s .  co m*/
public MRJobInfo getMRJobInfo(Id.Run runId) throws IOException, NotFoundException {
    Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType()));

    JobClient jobClient = new JobClient(hConf);
    JobStatus[] jobs = jobClient.getAllJobs();

    JobStatus thisJob = findJobForRunId(jobs, runId);

    RunningJob runningJob = jobClient.getJob(thisJob.getJobID());
    if (runningJob == null) {
        throw new IllegalStateException(String.format("JobClient returned null for RunId: '%s', JobId: '%s'",
                runId, thisJob.getJobID()));
    }
    Counters counters = runningJob.getCounters();

    TaskReport[] mapTaskReports = jobClient.getMapTaskReports(thisJob.getJobID());
    TaskReport[] reduceTaskReports = jobClient.getReduceTaskReports(thisJob.getJobID());

    return new MRJobInfo(runningJob.mapProgress(), runningJob.reduceProgress(),
            groupToMap(counters.getGroup(TaskCounter.class.getName())), toMRTaskInfos(mapTaskReports),
            toMRTaskInfos(reduceTaskReports), true);
}

From source file:com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.TaskLayer.java

License:Apache License

public String getTaskLevelCountersByJobId(String jobTracker, String jobId) throws IOException {
    StringBuilder taskCounters = new StringBuilder();

    JobID id = this.convertToJobId(jobId);
    JobClient client = this.prepareJobClient(jobTracker);
    RunningJob job = client.getJob(id);/*w  w w . j  a  va2 s. c om*/
    TaskReport[] mapTaskReports = client.getMapTaskReports(id);
    TaskReport[] reduceTaskReports = client.getReduceTaskReports(id);

    taskCounters.append("<job id='").append(jobId).append("' name='").append(job.getJobName()).append("'>\n");
    taskCounters.append(" <mapTasks>\n");

    for (TaskReport mapTaskReport : mapTaskReports) {
        taskCounters.append("  <task id='").append(mapTaskReport.getTaskID().toString()).append("'\n");
        taskCounters.append("   <counters>\n");

        Counters counter = mapTaskReport.getCounters();

        Iterator<Group> iter = counter.iterator();

        while (iter.hasNext()) {
            Group group = iter.next();

            Iterator<Counter> cIter = group.iterator();
            while (cIter.hasNext()) {
                Counter c = cIter.next();
                taskCounters.append("    <counter name='").append(c.getDisplayName()).append("' value='")
                        .append(c.getValue()).append("'>\n");
            }
        }

        taskCounters.append("   </counters>\n");
        taskCounters.append("  </task>\n");
    }

    taskCounters.append(" </mapTasks>\n");

    taskCounters.append(" <reduceTasks>\n");

    for (TaskReport reduceTaskReport : reduceTaskReports) {
        taskCounters.append("  <task id='").append(reduceTaskReport.getTaskID().toString()).append("'\n");
        taskCounters.append("   <counters>\n");

        Counters counter = reduceTaskReport.getCounters();

        Iterator<Group> iter = counter.iterator();

        while (iter.hasNext()) {
            Group group = iter.next();

            Iterator<Counter> cIter = group.iterator();
            while (cIter.hasNext()) {
                Counter c = cIter.next();
                taskCounters.append("    <counter name='").append(c.getDisplayName()).append("' value='")
                        .append(c.getValue()).append("'>\n");
            }
        }

        taskCounters.append("   </counters>\n");
        taskCounters.append("  </task>\n");
    }

    taskCounters.append(" </reduceTasks>\n");
    taskCounters.append("</job>");

    return taskCounters.toString();
}

From source file:com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.TaskLayer.java

License:Apache License

public String getTaskStatisticsByJobId(String jobTracker, String jobId) throws IOException {
    StringBuilder taskStatistics = new StringBuilder();
    long taskDuration;
    String duration;/*  w w w .  j a  va 2 s .  co m*/

    JobID id = this.convertToJobId(jobId);
    JobClient client = this.prepareJobClient(jobTracker);
    RunningJob job = client.getJob(id);

    TaskReport[] mapTaskReports = client.getMapTaskReports(id);
    TaskReport[] reduceTaskReports = client.getReduceTaskReports(id);

    taskStatistics.append("<job id='").append(jobId).append("' name='").append(job.getJobName()).append("'>\n");
    taskStatistics.append(" <mapTasks>\n");

    for (TaskReport mapTaskReport : mapTaskReports) {
        taskDuration = mapTaskReport.getFinishTime() - mapTaskReport.getStartTime();

        if (taskDuration < 0) {
            duration = "N/A";
        } else {
            duration = String.valueOf(taskDuration);
        }

        double progress = mapTaskReport.getProgress() * 100;
        String taskProgress = Double.toString(progress) + "%";

        taskStatistics.append("  <task id='").append(mapTaskReport.getTaskID().toString()).append("'\n");
        taskStatistics.append("   <progress>").append(taskProgress).append("</progress>\n");
        taskStatistics.append("   <duration>").append(duration).append("</duration>\n");
        taskStatistics.append("   <status>").append(mapTaskReport.getCurrentStatus().toString())
                .append("</status>\n");
        taskStatistics.append("  </task>\n");
    }

    taskStatistics.append(" </mapTasks>\n");

    taskStatistics.append(" <reduceTasks>\n");

    for (TaskReport reduceTaskReport : reduceTaskReports) {
        taskDuration = reduceTaskReport.getFinishTime() - reduceTaskReport.getStartTime();

        if (taskDuration < 0) {
            duration = "N/A";
        } else {
            duration = String.valueOf(taskDuration);
        }

        double progress = reduceTaskReport.getProgress() * 100;
        String taskProgress = Double.toString(progress) + "%";

        taskStatistics.append("  <task id='").append(reduceTaskReport.getTaskID().toString()).append("'\n");
        taskStatistics.append("   <progress>").append(taskProgress).append("</progress>\n");
        taskStatistics.append("   <duration>").append(duration).append("</duration>\n");
        taskStatistics.append("   <status>").append(reduceTaskReport.getCurrentStatus().toString())
                .append("</status>\n");
        taskStatistics.append("  </task>\n");
    }

    taskStatistics.append(" </reduceTasks>\n");
    taskStatistics.append("</job>");

    return taskStatistics.toString();
}

From source file:com.netflix.lipstick.pigtolipstick.BasicP2LClient.java

License:Apache License

protected void updatePlanStatusForCompletedJobId(P2jPlanStatus planStatus, String jobId) {
    LOG.info("Updating plan status for completed job " + jobId);
    updatePlanStatusForJobId(planStatus, jobId);
    JobClient jobClient = PigStats.get().getJobClient();
    JobID jobID = JobID.forName(jobId);//  w w  w . j a  v a 2s. co  m
    long startTime = Long.MAX_VALUE;
    long finishTime = Long.MIN_VALUE;
    /* The JobClient doesn't expose a way to get the Start and Finish time
       of the over all job[1] sadly, so we're pulling out the min task start
       time and max task finish time and using these to approximate.
            
       [1] - Which is really dumb.  The data obviously exists, it gets rendered
       in the job tracker via the JobInProgress but sadly this is internal
       to the remote job tracker so we don't have access to this
       information. */
    try {
        List<TaskReport> reports = Lists.newArrayList();
        reports.addAll(Arrays.asList(jobClient.getMapTaskReports(jobID)));
        reports.addAll(Arrays.asList(jobClient.getReduceTaskReports(jobID)));
        reports.addAll(Arrays.asList(jobClient.getCleanupTaskReports(jobID)));
        reports.addAll(Arrays.asList(jobClient.getSetupTaskReports(jobID)));
        for (TaskReport rpt : reports) {
            /* rpt.getStartTime() sometimes returns zero meaning it does
               not know what time it started so we need to prevent using
               this or we'll lose the actual lowest start time */
            long taskStartTime = rpt.getStartTime();
            if (0 != taskStartTime) {
                startTime = Math.min(startTime, taskStartTime);
            }
            finishTime = Math.max(finishTime, rpt.getFinishTime());
        }
        P2jJobStatus jobStatus = jobIdToJobStatusMap.get(jobId);
        if (startTime < Long.MAX_VALUE) {
            jobStatus.setStartTime(startTime);
        }
        if (finishTime > Long.MIN_VALUE) {
            jobStatus.setFinishTime(finishTime);
        }
        LOG.info("Determined start and finish times for job " + jobId);
    } catch (IOException e) {
        LOG.error("Error getting job info.", e);
    }

}

From source file:com.netflix.lipstick.pigtolipstick.BasicP2LClient.java

License:Apache License

/**
 * Build a P2jJobStatus object for the map/reduce job with id jobId.
 *
 * @param jobId the id of the map/reduce job
 * @return the newly created P2jJobStatus
 *//*from   w ww  . j  a  va2s  . c  om*/
@SuppressWarnings("deprecation")
protected P2jJobStatus buildJobStatusMap(String jobId) {
    JobClient jobClient = PigStats.get().getJobClient();
    P2jJobStatus js = jobIdToJobStatusMap.get(jobId);

    try {
        RunningJob rj = jobClient.getJob(jobId);
        if (rj == null) {
            LOG.warn("Couldn't find job status for jobId=" + jobId);
            return js;
        }

        JobID jobID = rj.getID();
        Counters counters = rj.getCounters();
        Map<String, P2jCounters> cMap = Maps.newHashMap();
        for (Group g : counters) {
            P2jCounters countersObj = new P2jCounters();
            cMap.put(g.getDisplayName(), countersObj);
            for (Counter c : g) {
                countersObj.getCounters().put(c.getDisplayName(), c.getValue());
            }
        }

        js.setCounters(cMap);
        TaskReport[] mapTaskReport = jobClient.getMapTaskReports(jobID);
        TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobID);
        js.setJobName(rj.getJobName());
        js.setTrackingUrl(rj.getTrackingURL());
        js.setIsComplete(rj.isComplete());
        js.setIsSuccessful(rj.isSuccessful());
        js.setMapProgress(rj.mapProgress());
        js.setReduceProgress(rj.reduceProgress());
        js.setTotalMappers(mapTaskReport.length);
        js.setTotalReducers(reduceTaskReport.length);
        return js;
    } catch (IOException e) {
        LOG.error("Error getting job info.", e);
    }

    return null;
}

From source file:com.netflix.lipstick.warnings.JobWarnings.java

License:Apache License

public List<ReducerDuration> enumerateReducerRunTimesAccending(JobClient jobClient, String jobId) {
    if (!jobClient.getConf().getBoolean("pig.stats.notaskreport", false)) {
        try {/*from ww  w. ja va2  s. co  m*/
            TaskReport[] reduceTasks = jobClient.getReduceTaskReports(jobId);
            return enumerateReducerRunTimesAccending(reduceTasks);
        } catch (IOException e) {
            log.error("Error getting reduce task reports, continuing", e);
        }
    } else {
        log.info("Skipping reduce task reports for job " + jobId);
    }
    return Lists.newArrayList();
}

From source file:com.twitter.ambrose.model.hadoop.MapReduceHelper.java

License:Apache License

private MapReduceJobState getMapReduceJobState(MapReduceJob job, JobClient jobClient) {
    try {//  ww w  .java 2s.  com
        RunningJob runningJob = jobClient.getJob(JobID.forName(job.getId()));
        if (runningJob == null) {
            log.warn("Couldn't find job status for jobId: " + job.getId());
            return null;
        }
        JobID jobID = runningJob.getID();
        TaskReport[] mapTaskReport = jobClient.getMapTaskReports(jobID);
        TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobID);

        return new MapReduceJobState(runningJob, mapTaskReport, reduceTaskReport);

    } catch (Exception e) {
        log.warn("Couldn't find job status for jobId: " + job.getId());
    }
    return null;
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /*from   www .j  a v a 2  s  . c o  m*/
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}