Example usage for org.apache.hadoop.mapred TaskCompletionEvent getTaskAttemptId

List of usage examples for org.apache.hadoop.mapred TaskCompletionEvent getTaskAttemptId

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskCompletionEvent getTaskAttemptId.

Prototype

public TaskAttemptID getTaskAttemptId() 

Source Link

Document

Returns task id.

Usage

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }/*w  w  w  . ja v a 2s .  c o  m*/
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:com.ibm.jaql.lang.expr.hadoop.Util.java

License:Apache License

public static void logAllTaskSyslogs(RunningJob rj, boolean onlySuccessful) throws Exception {
    String fetch = System.getProperty(FETCH_SYSLOG_PROP, "false");
    if (fetch.equals("false"))
        return;/*w  w w  .  ja va2  s. c om*/
    TaskCompletionEvent[] events = rj.getTaskCompletionEvents(0);
    for (TaskCompletionEvent event : events) {
        if (onlySuccessful && (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED)) {
            // print the syslog into the main log
            STATUS_LOG.info(event.toString());
            logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp());
        } else {
            STATUS_LOG.info(event.toString());
            logTaskSyslogs(event.getTaskAttemptId(), event.getTaskTrackerHttp());
        }
    }
}

From source file:com.liveramp.cascading_ext.flow.LoggingFlow.java

License:Apache License

private static String getFailureLog(TaskCompletionEvent event) {
    LOG.info("Getting errors for attempt " + event.getTaskAttemptId());
    String exception = "";
    try {/*from  ww w. ja  va 2  s .c  o m*/
        String fullLog = retrieveTaskLogs(event.getTaskAttemptId(), event.getTaskTrackerHttp());
        exception = extractErrorFromLogString(fullLog);
    } catch (IOException e) {
        LOG.info("Regex Error!", e);
    }
    return "\nCluster Log Exception:\n" + exception;
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * //from  w  w  w  .  j ava  2  s.c o  m
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

private Pair<ArrayList<TaskLog>, ArrayList<TaskLog>> getLogsFromCompletionEvents(TaskCompletionEvent[] events) {
    ArrayList<TaskLog> mapFailures = new ArrayList<TaskLog>();
    ArrayList<TaskLog> reduceFailures = new ArrayList<TaskLog>();
    for (TaskCompletionEvent event : events) {
        if (event.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
            TaskLog log = new TaskLog(event.getTaskTrackerHttp(), event.getTaskAttemptId());
            if (event.isMapTask())
                mapFailures.add(log);//from  w ww.j a v  a  2 s. c om
            else
                reduceFailures.add(log);
        }
    }

    return new Pair<ArrayList<TaskLog>, ArrayList<TaskLog>>(mapFailures, reduceFailures);
}

From source file:org.apache.falcon.logging.TaskLogRetrieverYarn.java

License:Apache License

@Override
public List<String> retrieveTaskLogURL(String jobIdStr) throws IOException {
    List<String> taskLogUrls = new ArrayList<String>();
    Configuration conf = getConf();
    Cluster cluster = getCluster(conf);/*from w  w  w.j av a2  s  .c o  m*/
    JobID jobID = JobID.forName(jobIdStr);
    if (jobID == null) {
        LOG.warn("External id for workflow action is null");
        return null;
    }

    if (conf.get(YARN_LOG_SERVER_URL) == null) {
        LOG.warn("YARN log Server is null");
        return null;
    }

    try {
        Job job = cluster.getJob(jobID);
        if (job != null) {
            TaskCompletionEvent[] events = job.getTaskCompletionEvents(0);
            for (TaskCompletionEvent event : events) {
                LogParams params = cluster.getLogParams(jobID, event.getTaskAttemptId());
                String url = (conf.get(YARN_LOG_SERVER_URL).startsWith(SCHEME) ? conf.get(YARN_LOG_SERVER_URL)
                        : SCHEME + conf.get(YARN_LOG_SERVER_URL)) + "/" + event.getTaskTrackerHttp() + "/"
                        + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner()
                        + "?start=0";
                LOG.info("Task Log URL for the job {} is {}" + jobIdStr, url);
                taskLogUrls.add(url);
            }
            return taskLogUrls;
        }
        LOG.warn("Unable to find the job in cluster {}" + jobIdStr);
        return null;
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.falcon.logging.v2.TaskLogRetrieverYarn.java

License:Apache License

@Override
public List<String> retrieveTaskLogURL(String jobIdStr) throws IOException {
    List<String> taskLogUrls = new ArrayList<String>();
    Configuration conf = getConf();
    Cluster cluster = getCluster(conf);//  w  w  w  . j  av  a2s. c  o  m
    JobID jobID = JobID.forName(jobIdStr);
    if (jobID == null) {
        LOG.warn("External id for workflow action is null");
        return null;
    }
    try {
        Job job = cluster.getJob(jobID);
        if (job != null) {
            TaskCompletionEvent[] events = job.getTaskCompletionEvents(0);
            for (TaskCompletionEvent event : events) {
                LogParams params = cluster.getLogParams(jobID, event.getTaskAttemptId());
                String url = SCHEME + conf.get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp() + "/"
                        + params.getContainerId() + "/" + params.getApplicationId() + "/" + params.getOwner()
                        + "?start=0";
                LOG.info("Task Log URL for the job {} is {}" + jobIdStr, url);
                taskLogUrls.add(url);
            }
            return taskLogUrls;
        }
        LOG.warn("Unable to find the job in cluster {}" + jobIdStr);
        return null;
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.falcon.logging.v2.TaskLogRetrieverYarnTest.java

License:Apache License

@DataProvider(name = "testData")
public Object[][] testData() throws IOException, InterruptedException {
    int samples = getRandomValueInRange(10) + 1;
    Object[][] resultSet = new Object[samples][2];
    for (int count = 0; count < samples; count++) {
        List<String> expectedResult = new ArrayList<String>();
        Cluster cluster = getCluster(getConf());
        String jobId = new JobID("job", RANDOM.nextInt(1000)).toString();
        boolean success = RANDOM.nextBoolean();
        JobID jobID = JobID.forName(jobId);
        int numEvents = getRandomValueInRange(10) + 1;
        TaskCompletionEvent[] events = getTaskCompletionEvents(numEvents, jobID);
        Job job = mock(Job.class);
        when(cluster.getJob(jobID)).thenReturn(job);
        when(job.getTaskCompletionEvents(0)).thenReturn(events);
        for (TaskCompletionEvent event : events) {
            if (success) {
                LogParams params = getLogParams();
                when(cluster.getLogParams(jobID, event.getTaskAttemptId())).thenReturn(params);
                String url = SCHEME + getConf().get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp()
                        + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/"
                        + params.getOwner() + "?start=0";
                expectedResult.add(url);
            } else {
                when(cluster.getJob(jobID)).thenReturn(null);
                expectedResult = null;/*  www. j  a va 2 s .com*/
            }
            resultSet[count] = new Object[] { jobId, expectedResult };
        }
    }
    return resultSet;
}

From source file:org.apache.falcon.oozie.logging.TaskLogRetrieverYarnTest.java

License:Apache License

@DataProvider(name = "testData")
public Object[][] testData() throws IOException, InterruptedException {
    int samples = getRandomValueInRange(10) + 1;
    Object[][] resultSet = new Object[samples][2];
    for (int count = 0; count < samples; count++) {
        List<String> expectedResult = new ArrayList<String>();
        Cluster cluster = getCluster(getConf());
        String jobId = new JobID("job", count).toString();
        boolean success = random.nextBoolean();
        JobID jobID = JobID.forName(jobId);
        int numEvents = getRandomValueInRange(10) + 1;
        TaskCompletionEvent[] events = getTaskCompletionEvents(numEvents, jobID);
        Job job = mock(Job.class);
        when(cluster.getJob(jobID)).thenReturn(job);
        when(job.getTaskCompletionEvents(0)).thenReturn(events);
        for (TaskCompletionEvent event : events) {
            if (success) {
                LogParams params = getLogParams();
                when(cluster.getLogParams(jobID, event.getTaskAttemptId())).thenReturn(params);
                String url = SCHEME + getConf().get(YARN_LOG_SERVER_URL) + "/" + event.getTaskTrackerHttp()
                        + "/" + params.getContainerId() + "/" + params.getApplicationId() + "/"
                        + params.getOwner() + "?start=0";
                expectedResult.add(url);
            } else {
                when(cluster.getJob(jobID)).thenReturn(null);
                expectedResult = null;/*w w w .ja va 2  s .  c  o m*/
                break;
            }
        }
        resultSet[count] = new Object[] { jobId, expectedResult };
    }
    return resultSet;
}

From source file:org.estado.core.JobStatusChecker.java

License:Apache License

private List<TaskStatus> getTaskDetails(RunningJob job) {
    TaskCompletionEvent[] tasks = new TaskCompletionEvent[0];
    List<TaskStatus> taskStatusList = new ArrayList<TaskStatus>();
    try {/*w w  w  . j  a  v  a  2s  . c  om*/
        tasks = job.getTaskCompletionEvents(0);

        for (TaskCompletionEvent task : tasks) {
            TaskStatus taskStatus = new TaskStatus();
            taskStatus.setTaskId(task.getTaskAttemptId().toString());
            taskStatus.setStatus(task.getTaskStatus().toString());
            taskStatus.setDuration(task.getTaskRunTime() * 1L); //change to long
            taskStatus.setTaskType(task.isMapTask() ? "Map" : "Reduce");
            if (!task.getTaskStatus().equals(TaskCompletionEvent.Status.SUCCEEDED)) {
                String url = task.getTaskTrackerHttp() + "/tasklog?attemptid=" + task.getTaskAttemptId()
                        + "&all=true";
                URLConnection connection = new URL(url).openConnection();
                connection.setDoOutput(true);
                connection.connect();
                Scanner s = new java.util.Scanner(connection.getInputStream()).useDelimiter("\\A");
                String log = s.hasNext() ? s.next() : "";
                taskStatus.setLog(log);
            }
            taskStatusList.add(taskStatus);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    return taskStatusList;
}