Example usage for org.apache.hadoop.mapred TaskReport getStartTime

List of usage examples for org.apache.hadoop.mapred TaskReport getStartTime


In this page you can find the example usage for org.apache.hadoop.mapred TaskReport getStartTime.


public long getStartTime() 

Source Link


Get start time of task.


From source file:azkaban.jobtype.MapReduceJobState.java

License:Apache License

public MapReduceJobState(RunningJob runningJob, TaskReport[] mapTaskReport, TaskReport[] reduceTaskReport)
        throws IOException {
    jobId = runningJob.getID().toString();
    jobName = runningJob.getJobName();// ww  w .  jav a 2s.  c  om
    trackingURL = runningJob.getTrackingURL();
    isComplete = runningJob.isComplete();
    isSuccessful = runningJob.isSuccessful();
    mapProgress = runningJob.mapProgress();
    reduceProgress = runningJob.reduceProgress();
    failureInfo = runningJob.getFailureInfo();

    totalMappers = mapTaskReport.length;
    totalReducers = reduceTaskReport.length;

    for (TaskReport report : mapTaskReport) {
        if (report.getStartTime() < jobStartTime || jobStartTime == 0L) {
            jobStartTime = report.getStartTime();

        TIPStatus status = report.getCurrentStatus();
        if (status != TIPStatus.PENDING && status != TIPStatus.RUNNING) {

    for (TaskReport report : reduceTaskReport) {
        if (jobLastUpdateTime < report.getFinishTime()) {
            jobLastUpdateTime = report.getFinishTime();

        TIPStatus status = report.getCurrentStatus();
        if (status != TIPStatus.PENDING && status != TIPStatus.RUNNING) {

    // If not all the reducers are finished.
    if (finishedReducersCount != reduceTaskReport.length || jobLastUpdateTime == 0) {
        jobLastUpdateTime = System.currentTimeMillis();

    counters = runningJob.getCounters();

From source file:co.cask.cdap.app.mapreduce.MRJobClient.java

License:Apache License

private List<MRTaskInfo> toMRTaskInfos(TaskReport[] taskReports) {
    List<MRTaskInfo> taskInfos = Lists.newArrayList();

    for (TaskReport taskReport : taskReports) {
        taskInfos.add(new MRTaskInfo(taskReport.getTaskId(), taskReport.getState(), taskReport.getStartTime(),
                taskReport.getFinishTime(), taskReport.getProgress(),
    }//from  w  w  w . j  a v a  2  s .c  om
    return taskInfos;

From source file:com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.TaskLayer.java

License:Apache License

public String getTaskStatisticsByJobId(String jobTracker, String jobId) throws IOException {
    StringBuilder taskStatistics = new StringBuilder();
    long taskDuration;
    String duration;//from w ww. j a v  a2s . c o  m

    JobID id = this.convertToJobId(jobId);
    JobClient client = this.prepareJobClient(jobTracker);
    RunningJob job = client.getJob(id);

    TaskReport[] mapTaskReports = client.getMapTaskReports(id);
    TaskReport[] reduceTaskReports = client.getReduceTaskReports(id);

    taskStatistics.append("<job id='").append(jobId).append("' name='").append(job.getJobName()).append("'>\n");
    taskStatistics.append(" <mapTasks>\n");

    for (TaskReport mapTaskReport : mapTaskReports) {
        taskDuration = mapTaskReport.getFinishTime() - mapTaskReport.getStartTime();

        if (taskDuration < 0) {
            duration = "N/A";
        } else {
            duration = String.valueOf(taskDuration);

        double progress = mapTaskReport.getProgress() * 100;
        String taskProgress = Double.toString(progress) + "%";

        taskStatistics.append("  <task id='").append(mapTaskReport.getTaskID().toString()).append("'\n");
        taskStatistics.append("   <progress>").append(taskProgress).append("</progress>\n");
        taskStatistics.append("   <duration>").append(duration).append("</duration>\n");
        taskStatistics.append("   <status>").append(mapTaskReport.getCurrentStatus().toString())
        taskStatistics.append("  </task>\n");

    taskStatistics.append(" </mapTasks>\n");

    taskStatistics.append(" <reduceTasks>\n");

    for (TaskReport reduceTaskReport : reduceTaskReports) {
        taskDuration = reduceTaskReport.getFinishTime() - reduceTaskReport.getStartTime();

        if (taskDuration < 0) {
            duration = "N/A";
        } else {
            duration = String.valueOf(taskDuration);

        double progress = reduceTaskReport.getProgress() * 100;
        String taskProgress = Double.toString(progress) + "%";

        taskStatistics.append("  <task id='").append(reduceTaskReport.getTaskID().toString()).append("'\n");
        taskStatistics.append("   <progress>").append(taskProgress).append("</progress>\n");
        taskStatistics.append("   <duration>").append(duration).append("</duration>\n");
        taskStatistics.append("   <status>").append(reduceTaskReport.getCurrentStatus().toString())
        taskStatistics.append("  </task>\n");

    taskStatistics.append(" </reduceTasks>\n");

    return taskStatistics.toString();

From source file:com.impetus.ankush2.hadoop.monitor.JobStatusProvider.java

License:Open Source License

 * Gets the task report./*from   w  ww  .  j ava2 s .c o m*/
 * @param taskReports
 *            the task reports
 * @return the task report
private Map<String, Object> getTaskReport(TaskReport[] taskReports) {
    Map<String, Object> taskReportsInfo = new HashMap<String, Object>();
    try {
        LOG.info("Total Task : " + taskReports.length);
        List<Map> taskLists = new ArrayList<Map>();
        // A report on the state of a task.
        if (taskReports != null) {
            int completeTask = 0;
            int failedTask = 0;
            int killedTask = 0;
            int runningTask = 0;
            int pendingTask = 0;
            Map<String, Object[]> diagInfo = new HashMap<String, Object[]>();
            // Iterating over the task reports
            for (TaskReport mtr : taskReports) {
                // Creating an empty map for storing task details
                Map<String, Object> taskReport = new HashMap<String, Object>();
                // The current status of the task
                TIPStatus currentStatus = mtr.getCurrentStatus();
                // Checking for task's current status COMPLETE
                if (currentStatus == TIPStatus.COMPLETE) {
                // Checking for task's current status KILLED
                if (currentStatus == TIPStatus.KILLED) {
                // Checking for task's current status RUNNING
                if (currentStatus == TIPStatus.RUNNING) {
                // Checking for task's current status PENDING
                if (currentStatus == TIPStatus.PENDING) {
                // The id of the task.
                TaskID taskId = mtr.getTaskID();
                float progress = mtr.getProgress();
                // The most recent state
                String state = mtr.getState();

                // Putting value in a map
                taskReport.put("taskId", taskId.toString());
                taskReport.put("successfulTaskAttemp", mtr.getSuccessfulTaskAttempt().toString());
                taskReport.put("startTime", mtr.getStartTime());
                taskReport.put("finishTime", mtr.getFinishTime());
                taskReport.put("progress", progress * 100);
                taskReport.put("state", state);
                taskReport.put("currentStatus", currentStatus);
                Counters counters = mtr.getCounters();
                List countersList = new ArrayList();
                for (Group group : counters) {
                    Map<String, Object> counterMap = new HashMap<String, Object>();
                    counterMap.put("name", group.getDisplayName());
                    List subCounters = new ArrayList();
                    for (Counter counter : group) {
                        Map subCounter = new HashMap();
                        subCounter.put("name", counter.getDisplayName());
                        subCounter.put("value", counter.getCounter());
                    counterMap.put("subCounters", subCounters);
                taskReport.put("counters", countersList);
                // A list of error messages.
                String[] diagnostics = mtr.getDiagnostics();
                if (diagnostics != null) {
                    int count = 0;
                    // Iterating over the list of error messages
                    for (String di : diagnostics) {
                        Object[] diagStatus = new Object[2];
                        diagStatus[0] = taskId;
                        diagStatus[1] = di;
                        diagInfo.put(taskId + "_" + count, diagStatus);
            // Putting value in a map
            taskReportsInfo.put("completedTask", completeTask);
            taskReportsInfo.put("pendingTask", pendingTask);
            taskReportsInfo.put("killedTask", killedTask);
            taskReportsInfo.put("runningTask", runningTask);
            taskReportsInfo.put("failedTask", failedTask);
            taskReportsInfo.put("failedOrKilledTask", failedTask);
            taskReportsInfo.put("diagInfo", diagInfo);
            taskReportsInfo.put("tasks", taskLists);
    } catch (Exception e) {
        HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, "Could not get task report",
                Constant.Component.Name.HADOOP, e);
    return taskReportsInfo;

From source file:com.netflix.lipstick.pigtolipstick.BasicP2LClient.java

License:Apache License

protected void updatePlanStatusForCompletedJobId(P2jPlanStatus planStatus, String jobId) {
    LOG.info("Updating plan status for completed job " + jobId);
    updatePlanStatusForJobId(planStatus, jobId);
    JobClient jobClient = PigStats.get().getJobClient();
    JobID jobID = JobID.forName(jobId);/*from ww  w.j  a va  2 s  .c o m*/
    long startTime = Long.MAX_VALUE;
    long finishTime = Long.MIN_VALUE;
    /* The JobClient doesn't expose a way to get the Start and Finish time
       of the over all job[1] sadly, so we're pulling out the min task start
       time and max task finish time and using these to approximate.
       [1] - Which is really dumb.  The data obviously exists, it gets rendered
       in the job tracker via the JobInProgress but sadly this is internal
       to the remote job tracker so we don't have access to this
       information. */
    try {
        List<TaskReport> reports = Lists.newArrayList();
        for (TaskReport rpt : reports) {
            /* rpt.getStartTime() sometimes returns zero meaning it does
               not know what time it started so we need to prevent using
               this or we'll lose the actual lowest start time */
            long taskStartTime = rpt.getStartTime();
            if (0 != taskStartTime) {
                startTime = Math.min(startTime, taskStartTime);
            finishTime = Math.max(finishTime, rpt.getFinishTime());
        P2jJobStatus jobStatus = jobIdToJobStatusMap.get(jobId);
        if (startTime < Long.MAX_VALUE) {
        if (finishTime > Long.MIN_VALUE) {
        LOG.info("Determined start and finish times for job " + jobId);
    } catch (IOException e) {
        LOG.error("Error getting job info.", e);


From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

public boolean testTask(TaskType taskType, String confParamName, long durationMin, final int MAX_RUN,
        float progress, boolean enforce, boolean dryRun, TIPStatus status, boolean wellBahaved, boolean killed)
        throws Exception {
    setTaskAttemptXML(durationMin * MIN, progress);

    TaskReport taskReport = mock(TaskReport.class);
    Collection<TaskAttemptID> attempts = new ArrayList<TaskAttemptID>();
    attempts.add(taskAttemptId);//  w  w  w.j a  v  a2s  .  c  o  m

    vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, dryRun);
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(confParamName, MAX_RUN);
    remoteAppConf.setBoolean(HadoopJobMonitorConfiguration.enforced(confParamName), enforce);
    when(taskReport.getStartTime()).thenReturn(now - durationMin * MIN);
    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);

    boolean res = appStatusChecker.checkTask(taskType, taskReport, now);

    if (wellBahaved)
        assertEquals("Well-bahved task does not pass the check", wellBahaved, res);
        assertEquals("Not Well-bahved task passes the check", wellBahaved, res);
    if (killed) {
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    } else
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    return res;

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /* ww w.jav  a  2 s. com*/
 * @param fs
 * @throws IOException
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
            setupStart = Math.min(setupStart, report.getStartTime());
    } else {
        _log.error("No setup reports");

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
    } else {
        _log.error("No map reports");

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
    } else {
        _log.error("No reduce reports");

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
    } else {
        _log.error("No cleanup reports");

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        attemptStats.put(key, attemptStats.get(key) + 1);

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));


From source file:dataload.LogFetchJobTracker.java

License:Apache License

 * This does the insertion of a given Task Report into the table
 * @param prepStatement/*from   w w w . ja  v a  2s . c om*/
 * @param reports
 * @param id
 * @throws SQLException
public void insertTaskIntoTable(PreparedStatement prepStatement, TaskReport[] reports, JobID id)
        throws SQLException {
    for (TaskReport rep : reports) {
        Counters c = rep.getCounters();
        Iterator<Counters.Group> itrG = c.iterator();

        prepStatement = connection.prepareStatement("INSERT INTO " + id
                + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
        for (int i = 1; i < 24; i++) {
            prepStatement.setLong(i, 0);

        prepStatement.setString(24, rep.getTaskID().toString());
        prepStatement.setLong(25, 0);

        if (!(rep.getFinishTime() == 0) && !(rep.getStartTime() == 0)) {
            prepStatement.setLong(25, (rep.getFinishTime() - rep.getStartTime()) / 1000);
            totalTime += (rep.getFinishTime() - rep.getStartTime());
        } else {
            prepStatement.setLong(25, 0);

        while (itrG.hasNext()) {
            Iterator<Counters.Counter> itrC = itrG.next().iterator();

            while (itrC.hasNext()) {
                Counters.Counter counter = itrC.next();
                if (mapCounter.get(counter.getName()) != null) {
                    prepStatement.setLong(mapCounter.get(counter.getName()), counter.getCounter());

From source file:org.apache.pig.backend.hadoop.executionengine.Launcher.java

License:Apache License

protected long computeTimeSpent(TaskReport[] taskReports) {
    long timeSpent = 0;
    for (TaskReport r : taskReports) {
        timeSpent += (r.getFinishTime() - r.getStartTime());
    }/*from w  w w .  j av  a2  s.c  om*/
    return timeSpent;

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java

License:Apache License

 * Submit a Pig job to hadoop./*from  ww w .  j  a v  a 2s .c om*/
 * @param mapFuncs
 *            a list of map functions to apply to the inputs. The cardinality of the list should
 *            be the same as input's cardinality.
 * @param groupFuncs
 *            a list of grouping functions to apply to the inputs. The cardinality of the list
 *            should be the same as input's cardinality.
 * @param reduceFunc
 *            the reduce function.
 * @param mapTasks
 *            the number of map tasks to use.
 * @param reduceTasks
 *            the number of reduce tasks to use.
 * @param input
 *            a list of inputs
 * @param output
 *            the path of the output.
 * @return an indicator of success or failure.
 * @throws IOException
public boolean launchPig(POMapreduce pom) throws IOException {
    JobConf conf = new JobConf(config);
    setJobProperties(conf, pom);
    Properties properties = pom.pigContext.getProperties();
    String jobName = properties.getProperty(PigContext.JOB_NAME);
    boolean success = false;
    List<String> funcs = new ArrayList<String>();

    if (pom.toMap != null) {
        for (EvalSpec es : pom.toMap)
    if (pom.groupFuncs != null) {
        for (EvalSpec es : pom.groupFuncs)
    if (pom.toReduce != null) {

    // create jobs.jar locally and pass it to hadoop
    File submitJarFile = File.createTempFile("Job", ".jar");
    try {
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        JarManager.createJar(fos, funcs, null, pom.pigContext);
        log.debug("Job jar size = " + submitJarFile.length());
        String user = System.getProperty("user.name");
        conf.setUser(user != null ? user : "Pigster");

        conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold"));
        conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size"));

        if (pom.reduceParallelism != -1) {
        if (pom.toMap != null) {
            conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap));
        if (pom.toCombine != null) {
            conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine));
            // this is to make sure that combiner is only called once
            // since we can't handle no combine or multiple combines
        if (pom.groupFuncs != null) {
            conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs));
        if (pom.toReduce != null) {
            conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce));
        if (pom.toSplit != null) {
            conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit));
        if (pom.pigContext != null) {
            conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext));
        if (pom.toCombine != null) {
        if (pom.quantilesFile != null) {
            conf.set("pig.quantilesFile", pom.quantilesFile);
        } else {
            // this is not a sort job - can use byte comparison to speed up processing
        if (pom.partitionFunction != null) {
        // not used starting with 0.15 conf.setInputKeyClass(Text.class);
        // not used starting with 0.15 conf.setInputValueClass(Tuple.class);
        if (pom.userComparator != null) {
        conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs));

        conf.setOutputPath(new Path(pom.outputFileSpec.getFileName()));
        conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec()));

        // Setup the DistributedCache for this job
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true);
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false);

        // Setup the logs directory for this job
        String jobOutputFileName = pom.pigContext.getJobOutputFile();
        if (jobOutputFileName != null && jobOutputFileName.length() > 0) {
            Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile());
            conf.set("pig.output.dir", jobOutputFile.getParent().toString());
            conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString());

        // Now, actually submit the job (using the submit name)
        JobClient jobClient = execEngine.getJobClient();
        RunningJob status = jobClient.submitJob(conf);
        log.debug("submitted job: " + status.getJobID());

        long sleepTime = 1000;
        double lastQueryProgress = -1.0;
        int lastJobsQueued = -1;
        double lastMapProgress = -1.0;
        double lastReduceProgress = -1.0;
        while (true) {
            try {
            } catch (Exception e) {

            if (status.isComplete()) {
                success = status.isSuccessful();
                if (log.isDebugEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Job finished ");
                    sb.append((success ? "" : "un"));
                if (success) {
                double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                    lastQueryProgress = queryProgress;
            } else // still running
                double mapProgress = status.mapProgress();
                double reduceProgress = status.reduceProgress();
                if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) {
                    if (log.isDebugEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Hadoop job progress: Map=");
                        sbProgress.append((int) (mapProgress * 100));
                        sbProgress.append("% Reduce=");
                        sbProgress.append((int) (reduceProgress * 100));
                    lastMapProgress = mapProgress;
                    lastReduceProgress = reduceProgress;
                double numJobsCompleted = mrJobNumber;
                double thisJobProgress = (mapProgress + reduceProgress) / 2.0;
                double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                    lastQueryProgress = queryProgress;

        // bug 1030028: if the input file is empty; hadoop doesn't create the output file!
        Path outputFile = conf.getOutputPath();
        String outputName = outputFile.getName();
        int colon = outputName.indexOf(':');
        if (colon != -1) {
            outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon));

        try {
            ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs()))

            if (success && !descriptor.exists()) {

                // create an empty output file
                PigFile f = new PigFile(outputFile.toString(), false);
                f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext);
        } catch (DataStorageException e) {
            throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e);

        if (!success) {
            // go find the error messages
            getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map");
            getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce");
        } else {
            long timeSpent = 0;

            // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet.
            TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID());
            TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID());
            for (TaskReport r : mapReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            for (TaskReport r : reduceReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            totalHadoopTimeSpent += timeSpent;
    } catch (Exception e) {
        // Do we need different handling for different exceptions
        throw WrappedIOException.wrap(e);
    } finally {
    return success;