Example usage for org.apache.hadoop.mapred JobStatus getRunState

List of usage examples for org.apache.hadoop.mapred JobStatus getRunState

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobStatus getRunState.

Prototype

public synchronized int getRunState() 

Source Link

Usage

From source file:com.ikanow.infinit.e.core.mapreduce.HadoopJobRunner.java

License:Open Source License

/**
 * Checks any running/queued jobs and updates their status if they've completed
 *//*from w  ww .j  a v  a  2 s. c om*/
public void updateJobStatus() {
    Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>();
    //get mongo entries that have jobids?
    try {
        JobClient jc = null;

        CustomMapReduceJobPojo cmr = getJobsToMakeComplete();
        while (cmr != null) {
            boolean markedComplete = false;
            //make sure its an actual ID, we now set jobidS to "" when running the job
            if (!cmr.jobidS.equals("")) {
                if (null == jc) {
                    try {
                        jc = new JobClient(getJobClientConnection(), new Configuration());
                    } catch (Exception e) {
                        // Better delete this, no idea what's going on....                  
                        _logger.info(
                                "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id="
                                        + cmr._id.toString() + " job_update_status_error_message=Skipping job: "
                                        + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce");
                        setJobComplete(cmr, true, true, -1, -1,
                                "Failed to launch job, unknown error (check configuration in  /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?).");
                        cmr = getJobsToMakeComplete();
                        continue;
                    }
                }

                //check if job is done, and update if it is               
                JobStatus[] jobs = jc.getAllJobs();
                boolean bFound = false;
                for (JobStatus j : jobs) {
                    if (j.getJobID().getJtIdentifier().equals(cmr.jobidS)
                            && j.getJobID().getId() == cmr.jobidN) {
                        bFound = true;
                        boolean error = false;
                        markedComplete = j.isJobComplete();
                        String errorMessage = null;
                        if (JobStatus.FAILED == j.getRunState()) {
                            markedComplete = true;
                            error = true;
                            errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly?";
                        }
                        setJobComplete(cmr, markedComplete, error, j.mapProgress(), j.reduceProgress(),
                                errorMessage);
                        break; // (from mini loop over hadoop jobs, not main loop over infinite tasks)
                    }
                }
                if (!bFound) { // Possible error
                    //check if its been longer than 5min and mark job as complete (it failed to launch)
                    Date currDate = new Date();
                    Date lastDate = cmr.lastRunTime;
                    //if its been more than 5 min (5m*60s*1000ms)               
                    if (currDate.getTime() - lastDate.getTime() > 300000) {
                        markedComplete = true;
                        setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #2.");
                    }
                }
            } else // this job hasn't been started yet:
            {
                //check if its been longer than 5min and mark job as complete (it failed to launch)
                Date currDate = new Date();
                Date lastDate = cmr.lastRunTime;
                //if its been more than 5 min (5m*60s*1000ms)               
                if (currDate.getTime() - lastDate.getTime() > 300000) {
                    markedComplete = true;
                    setJobComplete(cmr, true, true, -1, -1, "Failed to launch job, unknown error #1.");
                }
            }
            //job was not done, need to set flag back
            if (!markedComplete) {
                incompleteJobsMap.put(cmr._id, cmr.jobidS);
            }
            cmr = getJobsToMakeComplete();
        }
    } catch (Exception ex) {
        _logger.info("job_error_checking_status_message=" + HarvestExceptionUtils.createExceptionMessage(ex));
    } catch (Error err) {
        // Really really want to get to the next line of code, and clear the status...
    }

    //set all incomplete jobs back
    for (ObjectId id : incompleteJobsMap.keySet()) {
        BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id));
        DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id),
                new BasicDBObject(MongoDbManager.set_, update));
    }
}

From source file:com.ikanow.infinit.e.processing.custom.CustomProcessingController.java

License:Open Source License

public boolean checkRunningJobs(CustomMapReduceJobPojo jobOverride) {
    Map<ObjectId, String> incompleteJobsMap = new HashMap<ObjectId, String>();
    //get mongo entries that have jobids?
    try {/*from   w ww . j a v  a2s.c  om*/
        JobClient jc = null;

        CustomMapReduceJobPojo cmr = jobOverride;
        if (null == cmr)
            cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap);
        else if (null == cmr.jobidS)
            return true;

        while (cmr != null) {
            boolean markedComplete = false;
            //make sure its an actual ID, we now set jobidS to "" when running the job
            if (!cmr.jobidS.equals("")) // non null by construction
            {
                if (null == jc) {
                    try {
                        jc = new JobClient(InfiniteHadoopUtils.getJobClientConnection(prop_custom),
                                new Configuration());
                    } catch (Exception e) {
                        // Better delete this, no idea what's going on....                  
                        _logger.info(
                                "job_update_status_error_title=" + cmr.jobtitle + " job_update_status_error_id="
                                        + cmr._id.toString() + " job_update_status_error_message=Skipping job: "
                                        + cmr.jobidS + cmr.jobidN + ", this node does not run mapreduce");
                        _statusManager.setJobComplete(cmr, true, true, -1, -1,
                                "Failed to launch job, unknown error (check configuration in  /opt/hadoop-infinite/mapreduce/hadoop/, jobtracker may be localhost?).");
                        incompleteJobsMap.remove(cmr._id);
                        cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap);
                        continue;
                    }
                }

                //check if job is done, and update if it is               
                JobStatus[] jobs = jc.getAllJobs();
                boolean bFound = false;
                for (JobStatus j : jobs) {
                    if (j.getJobID().getJtIdentifier().equals(cmr.jobidS)
                            && j.getJobID().getId() == cmr.jobidN) {
                        bFound = true;
                        boolean error = false;
                        markedComplete = j.isJobComplete();
                        String errorMessage = null;
                        if (JobStatus.FAILED == j.getRunState()) {
                            markedComplete = true;
                            error = true;
                            errorMessage = "Job failed while running, check for errors in the mapper/reducer or that your key/value classes are set up correctly? "
                                    + j.getFailureInfo();
                        }
                        _statusManager.setJobComplete(cmr, markedComplete, error, j.mapProgress(),
                                j.reduceProgress(), errorMessage);
                        break; // (from mini loop over hadoop jobs, not main loop over infinite tasks)
                    }
                }
                if (!bFound) { // Possible error
                    //check if its been longer than 5min and mark job as complete (it failed to launch)
                    Date currDate = new Date();
                    Date lastDate = cmr.lastRunTime;
                    //if its been more than 5 min (5m*60s*1000ms)               
                    if (currDate.getTime() - lastDate.getTime() > 300000) {
                        markedComplete = true;
                        _statusManager.setJobComplete(cmr, true, true, -1, -1,
                                "Failed to launch job, unknown error #2.");
                    }
                }
            } else // this job hasn't been started yet:
            {
                //check if its been longer than 5min and mark job as complete (it failed to launch)
                Date currDate = new Date();
                Date lastDate = cmr.lastRunTime;
                //if its been more than 5 min (5m*60s*1000ms)               
                if (currDate.getTime() - lastDate.getTime() > 300000) {
                    markedComplete = true;
                    _statusManager.setJobComplete(cmr, true, true, -1, -1,
                            "Failed to launch job, unknown error #1.");
                }
            }
            //job was done, remove flag
            if (markedComplete) {
                incompleteJobsMap.remove(cmr._id);
            }
            if (null == jobOverride)
                cmr = CustomScheduleManager.getJobsToMakeComplete(_bHadoopEnabled, incompleteJobsMap);
            else
                cmr = null;
        }
    } catch (Exception ex) {
        _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(ex));
    } catch (Error err) {
        // Really really want to get to the next line of code, and clear the status...
        _logger.info("job_error_checking_status_message=" + InfiniteHadoopUtils.createExceptionMessage(err));
    }

    if (null == jobOverride) {
        //set all incomplete jobs' status back
        for (ObjectId id : incompleteJobsMap.keySet()) {
            BasicDBObject update = new BasicDBObject(CustomMapReduceJobPojo.jobidS_, incompleteJobsMap.get(id));
            DbManager.getCustom().getLookup().update(new BasicDBObject(CustomMapReduceJobPojo._id_, id),
                    new BasicDBObject(MongoDbManager.set_, update));
        }
    }
    return incompleteJobsMap.isEmpty();
}

From source file:com.impetus.ankush2.hadoop.monitor.JobStatusProvider.java

License:Open Source License

/**
 * @param jobClient/*from   ww w.  j  a  v  a 2 s.  c  om*/
 * @param jobSts
 * @return
 * @throws IOException
 */
private Map<String, Object> getJobReport(JobStatus jobSts) throws IOException {
    // Creating an empty map for storing job information
    Map<String, Object> jobReport = new HashMap<String, Object>();
    // Returns the jobid of the Job
    org.apache.hadoop.mapred.JobID jobId = jobSts.getJobID();
    // Get an RunningJob object to track an ongoing Map-Reduce
    // job.
    RunningJob job = jobClient.getJob(jobId);
    String jobName = "";
    if (job != null) {
        // Get the name of the job.
        jobName = job.getJobName();
    }
    // Percentage of progress in maps
    float mapProgress = jobSts.mapProgress() * 100;
    // Percentage of progress in reduce
    float reduceProgress = jobSts.reduceProgress() * 100;

    int mapTotal = 0;
    int reduceTotal = 0;
    int mapComp = 0;
    int reduceComp = 0;

    // Count for Map and Reduce Complete
    try {
        // Get the information of the current state of the map
        // tasks of a job
        TaskReport[] mapTaskReports = jobClient.getMapTaskReports(jobId);
        // Get the total map
        mapTotal = mapTaskReports.length;
        // Iterating over the map tasks
        for (TaskReport taskReport : mapTaskReports) {
            // The current state of a map TaskInProgress as seen
            // by the JobTracker.
            TIPStatus currentStatus = taskReport.getCurrentStatus();
            if (currentStatus == TIPStatus.COMPLETE) {
                mapComp++;
            }
        }

        // Get the information of the current state of the
        // reduce tasks of a job.
        TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobId);
        // Get the total reduce
        reduceTotal = reduceTaskReport.length;
        // Iterating over the reduce tasks
        for (TaskReport taskReport : reduceTaskReport) {
            // The current state of a reduce TaskInProgress as
            // seen by the JobTracker.
            TIPStatus currentStatus = taskReport.getCurrentStatus();
            if (currentStatus == TIPStatus.COMPLETE) {
                reduceComp++;
            }
        }
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
    }
    // Percentage of progress in setup
    float setupProgress = jobSts.setupProgress() * 100;
    // The progress made on cleanup
    float cleanupProgress = jobSts.cleanupProgress() * 100;
    // gets any available info on the reason of failure of the
    // job..Returns the diagnostic information on why a job
    // might have failed.
    String failureInfo = jobSts.getFailureInfo();

    // Putting Job Sttaus information in map
    jobReport.put("jobId", jobId.toString());
    jobReport.put("jobName", jobName);
    jobReport.put("jobPriority", jobSts.getJobPriority().toString());
    jobReport.put("jobStartTime", jobSts.getStartTime());

    jobReport.put("userName", jobSts.getUsername());
    jobReport.put("jobComplete", jobSts.isJobComplete());

    jobReport.put("mapProgress", mapProgress);
    jobReport.put("reduceProgress", reduceProgress);

    jobReport.put("mapTotal", mapTotal);
    jobReport.put("reduceTotal", reduceTotal);
    jobReport.put("mapCompleted", mapComp);
    jobReport.put("reduceCompleted", reduceComp);

    jobReport.put("setupProgress", setupProgress);
    jobReport.put("cleanupProgress", cleanupProgress);

    jobReport.put("schedulingInfo", jobSts.getSchedulingInfo());
    jobReport.put("jobState", JobStatus.getJobRunState(jobSts.getRunState()));
    jobReport.put("failureInfo", failureInfo);
    jobReport.put("jobFile", job.getJobFile());
    jobReport.put("trackingURL", job.getTrackingURL());

    jobReport.putAll(getDetailedJobReport(jobId));
    return jobReport;
}

From source file:org.apache.hive.hcatalog.templeton.tool.LaunchMapper.java

License:Apache License

/**
 * Attempts to reconnect to an already running child job of the templeton launcher. This
 * is used in cases where the templeton launcher task has failed and is retried by the
 * MR framework. If reconnect to the child job is possible, the method will continue
 * tracking its progress until completion.
 * @return Returns true if reconnect was successful, false if not supported or
 *         no child jobs were found./*  w  ww.  j av  a  2s .c  o  m*/
 */
private boolean tryReconnectToRunningJob(Configuration conf, Context context, LauncherDelegator.JobType jobType,
        String statusdir) throws IOException, InterruptedException {
    if (!reconnectToRunningJobEnabledAndSupported(conf, jobType)) {
        return false;
    }

    long startTime = getTempletonLaunchTime(conf);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    WebHCatJTShim tracker = ShimLoader.getHadoopShims().getWebHCatShim(conf, ugi);
    try {
        Set<String> childJobs = tracker.getJobs(context.getJobID().toString(), startTime);
        if (childJobs.size() == 0) {
            LOG.info("No child jobs found to reconnect with");
            return false;
        }

        if (childJobs.size() > 1) {
            LOG.warn(String.format("Found more than one child job to reconnect with: %s, skipping reconnect",
                    Arrays.toString(childJobs.toArray())));
            return false;
        }

        String childJobIdString = childJobs.iterator().next();
        org.apache.hadoop.mapred.JobID childJobId = org.apache.hadoop.mapred.JobID.forName(childJobIdString);
        LOG.info(String.format("Reconnecting to an existing job %s", childJobIdString));

        // Update job state with the childJob id
        updateJobStatePercentAndChildId(conf, context.getJobID().toString(), null, childJobIdString);

        do {
            org.apache.hadoop.mapred.JobStatus jobStatus = tracker.getJobStatus(childJobId);
            if (jobStatus.isJobComplete()) {
                LOG.info(String.format("Child job %s completed", childJobIdString));
                int exitCode = 0;
                if (jobStatus.getRunState() != org.apache.hadoop.mapred.JobStatus.SUCCEEDED) {
                    exitCode = 1;
                }
                updateJobStateToDoneAndWriteExitValue(conf, statusdir, context.getJobID().toString(), exitCode);
                break;
            }

            String percent = String.format("map %s%%, reduce %s%%", jobStatus.mapProgress() * 100,
                    jobStatus.reduceProgress() * 100);
            updateJobStatePercentAndChildId(conf, context.getJobID().toString(), percent, null);

            LOG.info("KeepAlive Heart beat");

            context.progress();
            Thread.sleep(POLL_JOBPROGRESS_MSEC);
        } while (true);

        // Reconnect was successful
        return true;
    } catch (IOException ex) {
        LOG.error("Exception encountered in tryReconnectToRunningJob", ex);
        throw ex;
    } finally {
        tracker.close();
    }
}

From source file:org.estado.core.JobStatusChecker.java

License:Apache License

public void checkStatus() {
    List<org.estado.spi.JobStatus> jobStatusList = new ArrayList<org.estado.spi.JobStatus>();

    try {/*from w w  w.j  ava2s .  c om*/
        Configuration conf = new Configuration();
        JobClient client = new JobClient(new JobConf(conf));
        JobStatus[] jobStatuses = client.getAllJobs();
        showFilter();

        int jobCount = 0;
        for (JobStatus jobStatus : jobStatuses) {
            Long lastTaskEndTime = 0L;
            TaskReport[] mapReports = client.getMapTaskReports(jobStatus.getJobID());
            for (TaskReport r : mapReports) {
                if (lastTaskEndTime < r.getFinishTime()) {
                    lastTaskEndTime = r.getFinishTime();
                }
            }
            TaskReport[] reduceReports = client.getReduceTaskReports(jobStatus.getJobID());
            for (TaskReport r : reduceReports) {
                if (lastTaskEndTime < r.getFinishTime()) {
                    lastTaskEndTime = r.getFinishTime();
                }
            }
            client.getSetupTaskReports(jobStatus.getJobID());
            client.getCleanupTaskReports(jobStatus.getJobID());

            String jobId = jobStatus.getJobID().toString();
            String jobName = client.getJob(jobStatus.getJobID()).getJobName();
            Long startTime = jobStatus.getStartTime();
            String user = jobStatus.getUsername();
            int mapProgress = (int) (jobStatus.mapProgress() * 100);
            int reduceProgress = (int) (jobStatus.reduceProgress() * 100);
            org.estado.spi.JobStatus jobStat = null;
            ++jobCount;

            int runState = jobStatus.getRunState();
            switch (runState) {
            case JobStatus.SUCCEEDED:
                if (filter.contains("s")) {
                    Long duration = lastTaskEndTime - jobStatus.getStartTime();
                    jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime,
                            lastTaskEndTime, duration, mapProgress, reduceProgress, "completed");
                    ++sCount;
                }
                break;

            case JobStatus.RUNNING:
                if (filter.contains("r")) {
                    long duration = System.currentTimeMillis() - jobStatus.getStartTime();
                    jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime,
                            lastTaskEndTime, duration, mapProgress, reduceProgress, "running");
                    ++rCount;
                }
                break;

            case JobStatus.FAILED:
                if (filter.contains("f")) {
                    long duration = lastTaskEndTime - jobStatus.getStartTime();
                    jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime,
                            lastTaskEndTime, duration, mapProgress, reduceProgress, "failed");
                    RunningJob job = client.getJob(jobStatus.getJobID());
                    jobStat.setJobTasks(getTaskDetails(job));
                    ++fCount;
                }
                break;

            case JobStatus.PREP:
                if (filter.contains("p")) {
                    jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, null, null,
                            null, 0, 0, "preparing");
                    ++pCount;
                }
                break;

            case JobStatus.KILLED:
                if (filter.contains("k")) {
                    long duration = lastTaskEndTime - jobStatus.getStartTime();

                    jobStat = new org.estado.spi.JobStatus(cluster, jobId, jobName, null, user, startTime,
                            lastTaskEndTime, duration, mapProgress, reduceProgress, "killed");

                    RunningJob job = client.getJob(jobStatus.getJobID());
                    jobStat.setJobTasks(getTaskDetails(job));
                    ++kCount;
                }
                break;
            }

            jobStatusList.add(jobStat);
        }

        //get counters
        for (org.estado.spi.JobStatus jobStat : jobStatusList) {
            if (!jobStat.getStatus().equals("preparing")) {
                List<JobCounterGroup> counterGroups = getJobCounters(jobStat.getJobId());
                jobStat.setCounterGroups(counterGroups);

                //additional data from counters
                setJobInfo(jobStat);
            }
        }

        //publish to all consumers
        for (JobStatusConsumer consumer : consumers) {
            consumer.handle(jobStatusList);
        }

        showJobCounts();
    } catch (Exception ex) {
        System.out.println("Jobs status checker failed" + ex.getMessage());
    }

}