Example usage for org.apache.hadoop.mapred TaskCompletionEvent getTaskStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskCompletionEvent getTaskStatus.

Prototype

public Status getTaskStatus()

Source Link

Document

Returns Status

Usage

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }//from w  w  w. ja va2 s . co m
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:cascading.flow.hadoop.HadoopStepStats.java

License:Open Source License

private void addTaskStats(TaskCompletionEvent[] events) {
    for (TaskCompletionEvent event : events) {
        if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED)
            getTaskStats().add(new HadoopTaskStats(event));
    }//from   w  w  w  .j a  va  2 s .  c  om
}

From source file:com.ibm.jaql.lang.expr.hadoop.Util.java

License:Apache License

public static void logAllTaskSyslogs(RunningJob rj, boolean onlySuccessful) throws Exception {
    String fetch = System.getProperty(FETCH_SYSLOG_PROP, "false");
    if (fetch.equals("false"))
        return;//  w w w.  j a  va 2  s . co m
    TaskCompletionEvent[] events = rj.getTaskCompletionEvents(0);
    for (TaskCompletionEvent event : events) {
        if (onlySuccessful && (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED)) {
            // print the syslog into the main log
            STATUS_LOG.info(event.toString());
            logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp());
        } else {
            STATUS_LOG.info(event.toString());
            logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp());
        }
    }
}

From source file:com.liveramp.cascading_ext.flow.LoggingFlow.java

License:Apache License

private String logJobErrors() {
    boolean exceptions = false;
    StringBuilder jobErrors = new StringBuilder();
    final String divider = StringUtils.repeat("-", 80);
    logAndAppend(jobErrors, divider);/*  w w  w .  ja v a  2s. c  o m*/
    try {
        List<FlowStepStats> stepStats = getFlowStats().getFlowStepStats();
        Set<String> jobFailures = new HashSet<String>();
        for (FlowStepStats stat : stepStats) {
            try {
                RunningJob job = ((HadoopStepStats) stat).getRunningJob();
                TaskCompletionEvent[] events = job.getTaskCompletionEvents(0);
                ArrayList<TaskCompletionEvent> failures = new ArrayList<TaskCompletionEvent>();
                for (TaskCompletionEvent event : events) {
                    if (event.getTaskStatus() == Status.FAILED) {
                        failures.add(event);
                    }
                }
                // We limit the number of potential logs being pulled to spare the jobtracker
                if (failures.size() > 0) {
                    Collections.shuffle(failures);
                    for (int i = 0; i < FAILURES_TO_QUERY; i++) {
                        jobFailures.add(getFailureLog(failures.get(i)));
                    }
                }
            } catch (Exception e) {
                exceptions = true;
            }
        }
        if (exceptions) {
            logAndAppend(jobErrors, "unable to retrieve failures from all completed steps!");
            logAndAppend(jobErrors,
                    "successfully retrieved job failures: " + StringUtils.join(jobFailures, ", "));
        } else {
            logAndAppend(jobErrors, "step attempt failures: " + StringUtils.join(jobFailures, ", "));
        }
    } catch (Exception e) {
        logAndAppend(jobErrors, "unable to retrieve any failures from steps");
        logAndAppend(jobErrors, e.toString());
    }
    logAndAppend(jobErrors, divider);
    return jobErrors.toString();
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /*w  w  w  .java 2  s  .  c o  m*/
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

private Pair<ArrayList<TaskLog>, ArrayList<TaskLog>> getLogsFromCompletionEvents(TaskCompletionEvent[] events) {
    ArrayList<TaskLog> mapFailures = new ArrayList<TaskLog>();
    ArrayList<TaskLog> reduceFailures = new ArrayList<TaskLog>();
    for (TaskCompletionEvent event : events) {
        if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
            TaskLog log = new TaskLog(event.getTaskTrackerHttp(), event.getTaskAttemptId());
            if (event.isMapTask())
                mapFailures.add(log);/*ww  w .ja  va 2s . c o m*/
            else
                reduceFailures.add(log);
        }
    }

    return new Pair<ArrayList<TaskLog>, ArrayList<TaskLog>>(mapFailures, reduceFailures);
}

From source file:io.hops.erasure_coding.MapReduceEncoder.java

License:Apache License

/**
 * Checks if the map-reduce job has completed.
 *
 * @return true if the job completed, false otherwise.
 * @throws java.io.IOException/*from  ww  w.  j  ava  2s.co  m*/
 */
public boolean checkComplete() throws IOException {
    JobID jobID = runningJob.getID();
    if (runningJob.isComplete()) {
        // delete job directory
        final String jobdir = jobconf.get(JOB_DIR_LABEL);
        if (jobdir != null) {
            final Path jobpath = new Path(jobdir);
            jobpath.getFileSystem(jobconf).delete(jobpath, true);
        }
        if (runningJob.isSuccessful()) {
            LOG.info("Job Complete(Succeeded): " + jobID);
        } else {
            LOG.info("Job Complete(Failed): " + jobID);
        }
        cleanUp();
        return true;
    } else {
        String report = (" job " + jobID + " map " + StringUtils.formatPercent(runningJob.mapProgress(), 0)
                + " reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0));
        if (!report.equals(lastReport)) {
            LOG.info(report);
            lastReport = report;
        }
        TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(jobEventCounter);
        jobEventCounter += events.length;
        for (TaskCompletionEvent event : events) {
            if (event.getTaskStatus() == TaskCompletionEvent.Status.FAILED) {
                LOG.info(" Job " + jobID + " " + event.toString());
            }
        }
        return false;
    }
}

From source file:org.estado.core.JobStatusChecker.java

License:Apache License

private List<TaskStatus> getTaskDetails(RunningJob job) {
    TaskCompletionEvent[] tasks = new TaskCompletionEvent[0];
    List<TaskStatus> taskStatusList = new ArrayList<TaskStatus>();
    try {// w w w  . j av  a  2  s  . c om
        tasks = job.getTaskCompletionEvents(0);

        for (TaskCompletionEvent task : tasks) {
            TaskStatus taskStatus = new TaskStatus();
            taskStatus.setTaskId(task.getTaskAttemptId().toString());
            taskStatus.setStatus(task.getTaskStatus().toString());
            taskStatus.setDuration(task.getTaskRunTime() * 1L); //change to long
            taskStatus.setTaskType(task.isMapTask() ? "Map" : "Reduce");
            if (!task.getTaskStatus().equals(TaskCompletionEvent.Status.SUCCEEDED)) {
                String url = task.getTaskTrackerHttp() + "/tasklog?attemptid=" + task.getTaskAttemptId()
                        + "&all=true";
                URLConnection connection = new URL(url).openConnection();
                connection.setDoOutput(true);
                connection.connect();
                Scanner s = new java.util.Scanner(connection.getInputStream()).useDelimiter("\\A");
                String log = s.hasNext() ? s.next() : "";
                taskStatus.setLog(log);
            }
            taskStatusList.add(taskStatus);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    return taskStatusList;
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public REXP getstatus(String jd, boolean geterrors) throws Exception {
    org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd);
    if (jj == null)
        throw new IOException("Jobtracker could not find jobID: " + jd);
    org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj);
    if (rj == null)
        throw new IOException(
                "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) ");
    String jobfile = rj.getJobFile();
    String jobname = rj.getJobName();
    // cfg.addResource(new Path(jobfile));
    org.apache.hadoop.mapred.Counters cc = rj.getCounters();
    long startsec = getStart(jclient, jj);
    double dura = ((double) System.currentTimeMillis() - startsec) / 1000;
    REXP ro = FileUtils.buildlistFromOldCounter(cc, dura);
    int jobs = rj.getJobState();
    String jobss = null;//from   ww w  . j  a va2  s. c o m
    if (jobs == JobStatus.FAILED)
        jobss = "FAILED";
    else if (jobs == JobStatus.KILLED)
        jobss = "KILLED";
    else if (jobs == JobStatus.PREP)
        jobss = "PREP";
    else if (jobs == JobStatus.RUNNING)
        jobss = "RUNNING";
    else if (jobs == JobStatus.SUCCEEDED)
        jobss = "SUCCEEDED";
    float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress();

    org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj);
    org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj);

    int totalmaps = maptr.length, totalreds = redtr.length;
    int mappending = 0, redpending = 0, maprunning = 0, redrunning = 0, redfailed = 0, redkilled = 0,
            mapkilled = 0, mapfailed = 0, mapcomp = 0, redcomp = 0;
    for (int i = 0; i < maptr.length; i++) {
        TIPStatus t = maptr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            mapcomp++;
            break;
        case FAILED:
            mapfailed++;
            break;
        case PENDING:
            mappending++;
            break;
        case RUNNING:
            maprunning++;
            break;
        case KILLED:
            mapkilled++;
            break;
        }
    }
    for (int i = 0; i < redtr.length; i++) {
        TIPStatus t = redtr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            redcomp++;
            break;
        case FAILED:
            redfailed++;
            break;
        case PENDING:
            redpending++;
            break;
        case RUNNING:
            redrunning++;
            break;
        case KILLED:
            redkilled++;
            break;
        }
    }
    int reduceafails = 0, reduceakilled = 0, mapafails = 0, mapakilled = 0;
    int startfrom = 0;

    REXP.Builder errcontainer = REXP.newBuilder();
    errcontainer.setRclass(REXP.RClass.STRING);
    while (true) {
        org.apache.hadoop.mapred.TaskCompletionEvent[] events = rj.getTaskCompletionEvents(startfrom);
        for (int i = 0; i < events.length; i++) {
            org.apache.hadoop.mapred.TaskCompletionEvent e = events[i];
            int f = 0, k = 0;
            switch (e.getTaskStatus()) {
            case KILLED:
                if (e.isMapTask()) {
                    mapakilled++;
                } else {
                    reduceakilled++;
                }
                break;
            case TIPFAILED:
            case FAILED:
                if (e.isMapTask()) {
                    mapafails++;
                } else {
                    reduceafails++;
                }
                if (geterrors) {
                    REXPProtos.STRING.Builder content = REXPProtos.STRING.newBuilder();
                    String[] s = rj.getTaskDiagnostics(e.getTaskAttemptId());
                    if (s != null && s.length > 0) {
                        content.setStrval(s[0]);
                        errcontainer.addStringValue(content.build());
                    }
                }
                break;
            }
        }
        startfrom += events.length;
        if (events.length == 0)
            break;
    }

    REXP.Builder thevals = REXP.newBuilder();
    thevals.setRclass(REXP.RClass.LIST);
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { dura }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalmaps, mappending, maprunning, mapcomp, mapkilled, mapafails, mapakilled }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalreds, redpending, redrunning, redcomp, redkilled, reduceafails, reduceakilled }));
    thevals.addRexpValue(ro);
    thevals.addRexpValue(errcontainer);
    thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL()));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname }));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile }));
    return (thevals.build());
}

From source file:org.smartfrog.services.hadoop.mapreduce.submitter.SubmitterImpl.java

License:Open Source License

/**
 * Handl the end of the job/*from www.  j ava 2  s. c  o m*/
 *
 * @throws IOException on any failure
 */
private void processEndOfJob() throws IOException {
    boolean succeeded = runningJob.isSuccessful();
    int taskCount = 0;
    int failures = 0;
    String message = "Job " + runningJob.getJobName() + " ID=" + runningJob.getID().toString() + " has "
            + (succeeded ? " succeeded" : "failed");
    StringBuilder builder = new StringBuilder();

    TaskCompletionEvent[] history = runningJob.getTaskCompletionEvents(0);
    for (TaskCompletionEvent event : history) {
        taskCount++;
        builder.append(event.isMapTask() ? "\nMap: " : "\nReduce: ");
        builder.append(event.toString());
        if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
            failures++;
            String[] diagnostics = runningJob.getTaskDiagnostics(event.getTaskAttemptId());
            for (String line : diagnostics) {
                builder.append("\n ");
                builder.append(line);
            }
        }
        builder.append("\n Tasks run :").append(taskCount).append(" failed: ").append(failures);
        if (!succeeded && dumpOnFailure) {
            builder.append("Job configuration used");
            builder.append(jobConf.dump());
        }
        message = message + builder.toString();

    }
    sfLog().info(message);
    if (terminateWhenJobFinishes) {
        TerminationRecord record = succeeded ? TerminationRecord.normal(message, sfCompleteNameSafe())
                : TerminationRecord.abnormal(message, sfCompleteNameSafe());
        new ComponentHelper(this).targetForTermination(record, false, false);
    }
}