List of usage examples for org.apache.hadoop.mapred.jobcontrol Job getAssignedJobID
public JobID getAssignedJobID()
From source file:com.ebay.erl.mobius.core.MobiusJobRunner.java
License:Apache License
private static String jobToString(Job aJob) { StringBuffer sb = new StringBuffer(); sb.append("job mapred id:\t") .append(aJob.getAssignedJobID() == null ? "unassigned" : aJob.getAssignedJobID().toString()) .append("\t"); sb.append("job name: ").append(aJob.getJobName()).append("\n"); String state = "Unset"; switch (aJob.getState()) { case Job.DEPENDENT_FAILED: state = "DEPENDENT_FAILED"; break;/*ww w . j a va 2 s. c o m*/ case Job.FAILED: state = "FAILED"; break; case Job.READY: state = "READY"; break; case Job.RUNNING: state = "RUNNING"; break; case Job.SUCCESS: state = "SUCCESS"; break; case Job.WAITING: state = "WAITING"; break; } sb.append("job state:\t").append(state).append("\n"); sb.append("job id:\t").append(aJob.getJobID()).append("\n"); sb.append("job message:\t").append(aJob.getMessage()).append("\n"); // comment out on March 30, 2012. As NPE is thrown on Apollo. // // if ( aJob.getDependingJobs () == null || aJob.getDependingJobs ().size () == 0 ) // { // sb.append ("job has no depending job:\t").append ("\n"); // } else // { // sb.append ("job has ").append (aJob.getDependingJobs ().size ()).append (" dependeng jobs:\n"); // for ( int i = 0; i < aJob.getDependingJobs ().size (); i++ ) // { // sb.append ("\t depending job ").append (i).append (":\t"); // sb.append ((aJob.getDependingJobs ().get (i)).getJobName ()).append ("\n"); // } // } return sb.toString().trim(); }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezLauncher.java
License:Apache License
@SuppressWarnings("deprecation") void computeWarningAggregate(Job job, JobClient jobClient, Map<Enum, Long> aggMap) { JobID mapRedJobID = job.getAssignedJobID(); RunningJob runningJob = null;//from w w w .j a v a2 s. c o m try { runningJob = jobClient.getJob(mapRedJobID); if (runningJob != null) { Counters counters = runningJob.getCounters(); if (counters == null) { long nullCounterCount = aggMap.get(PigWarning.NULL_COUNTER_COUNT) == null ? 0 : aggMap.get(PigWarning.NULL_COUNTER_COUNT); nullCounterCount++; aggMap.put(PigWarning.NULL_COUNTER_COUNT, nullCounterCount); } try { for (Enum e : PigWarning.values()) { if (e != PigWarning.NULL_COUNTER_COUNT) { Long currentCount = aggMap.get(e); currentCount = (currentCount == null ? 0 : currentCount); // This code checks if the counters is null, if it is, // we need to report to the user that the number // of warning aggregations may not be correct. In fact, // Counters should not be null, it is // a hadoop bug, once this bug is fixed in hadoop, the // null handling code should never be hit. // See Pig-943 if (counters != null) currentCount += counters.getCounter(e); aggMap.put(e, currentCount); } } } catch (Exception e) { log.warn("Exception getting counters.", e); } } } catch (IOException ioe) { String msg = "Unable to retrieve job to compute warning aggregation."; log.warn(msg); } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.Launcher.java
License:Apache License
protected void getStats(Job job, JobClient jobClient, boolean errNotDbg, PigContext pigContext) throws Exception { JobID MRJobID = job.getAssignedJobID(); String jobMessage = job.getMessage(); Exception backendException = null; if (MRJobID == null) { try {//from ww w . jav a 2s .c o m LogUtils.writeLog("Backend error message during job submission", jobMessage, pigContext.getProperties().getProperty("pig.logfile"), log); backendException = getExceptionFromString(jobMessage); } catch (Exception e) { int errCode = 2997; String msg = "Unable to recreate exception from backend error: " + jobMessage; throw new ExecException(msg, errCode, PigException.BUG); } throw backendException; } try { TaskReport[] mapRep = jobClient.getMapTaskReports(MRJobID); getErrorMessages(mapRep, "map", errNotDbg, pigContext); totalHadoopTimeSpent += computeTimeSpent(mapRep); mapRep = null; TaskReport[] redRep = jobClient.getReduceTaskReports(MRJobID); getErrorMessages(redRep, "reduce", errNotDbg, pigContext); totalHadoopTimeSpent += computeTimeSpent(redRep); redRep = null; } catch (IOException e) { if (job.getState() == Job.SUCCESS) { // if the job succeeded, let the user know that // we were unable to get statistics log.warn("Unable to get job related diagnostics"); } else { throw e; } } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.Launcher.java
License:Apache License
/** * Returns the progress of a Job j which is part of a submitted * JobControl object. The progress is for this Job. So it has to * be scaled down by the num of jobs that are present in the * JobControl.//from w ww . j a v a 2s . c om * @param j - The Job for which progress is required * @param jobClient - the JobClient to which it has been submitted * @return Returns the percentage progress of this Job * @throws IOException */ protected double progressOfRunningJob(Job j, JobClient jobClient) throws IOException { JobID mrJobID = j.getAssignedJobID(); RunningJob rj = jobClient.getJob(mrJobID); if (rj == null && j.getState() == Job.SUCCESS) return 1; else if (rj == null) return 0; else { double mapProg = rj.mapProgress(); double redProg = rj.reduceProgress(); return (mapProg + redProg) / 2; } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.java
License:Apache License
@Override public void kill() { try {// ww w . j a va2s .c om log.debug("Receive kill signal"); if (jc != null) { for (Job job : jc.getRunningJobs()) { HadoopShims.killJob(job); log.info("Job " + job.getAssignedJobID() + " killed"); } } } catch (Exception e) { log.warn("Encounter exception on cleanup:" + e); } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.java
License:Apache License
@Override public PigStats launchPig(PhysicalPlan php, String grpName, PigContext pc) throws PlanException, VisitorException, IOException, ExecException, JobCreationException, Exception { long sleepTime = 500; aggregateWarning = Boolean.valueOf(pc.getProperties().getProperty("aggregate.warning")); MROperPlan mrp = compile(php, pc);// w w w.j a v a 2 s . c om ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); MRExecutionEngine exe = (MRExecutionEngine) pc.getExecutionEngine(); Properties defaultProperties = new Properties(); JobConf defaultJobConf = exe.getLocalConf(); Utils.recomputeProperties(defaultJobConf, defaultProperties); // This is a generic JobClient for checking progress of the jobs JobClient statsJobClient = new JobClient(exe.getJobConf()); JobControlCompiler jcc = new JobControlCompiler(pc, conf, ConfigurationUtil.toConfiguration(defaultProperties)); MRScriptState.get().addWorkflowAdjacenciesToConf(mrp, conf); // start collecting statistics PigStats.start(pc.getExecutionEngine().instantiatePigStats()); MRPigStatsUtil.startCollection(pc, statsJobClient, jcc, mrp); // Find all the intermediate data stores. The plan will be destroyed during compile/execution // so this needs to be done before. MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp); intermediateVisitor.visit(); List<Job> failedJobs = new LinkedList<Job>(); List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>(); List<Job> completeFailedJobsInThisRun = new LinkedList<Job>(); List<Job> succJobs = new LinkedList<Job>(); int totalMRJobs = mrp.size(); int numMRJobsCompl = 0; double lastProg = -1; long scriptSubmittedTimestamp = System.currentTimeMillis(); //create the exception handler for the job control thread //and register the handler with the job control thread JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler(); boolean stop_on_failure = Boolean.valueOf(pc.getProperties().getProperty("stop.on.failure", "false")); // jc is null only when mrp.size == 0 while (mrp.size() != 0) { jc = jcc.compile(mrp, grpName); if (jc == null) { List<MapReduceOper> roots = new LinkedList<MapReduceOper>(); roots.addAll(mrp.getRoots()); // run the native mapreduce roots first then run the rest of the roots for (MapReduceOper mro : roots) { if (mro instanceof NativeMapReduceOper) { NativeMapReduceOper natOp = (NativeMapReduceOper) mro; try { MRScriptState.get().emitJobsSubmittedNotification(1); natOp.runJob(); numMRJobsCompl++; } catch (IOException e) { mrp.trimBelow(natOp); failedNativeMR.add(natOp); String msg = "Error running native mapreduce" + " operator job :" + natOp.getJobId() + e.getMessage(); String stackTrace = Utils.getStackStraceStr(e); LogUtils.writeLog(msg, stackTrace, pc.getProperties().getProperty("pig.logfile"), log); log.info(msg); if (stop_on_failure) { int errCode = 6017; throw new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT); } } double prog = ((double) numMRJobsCompl) / totalMRJobs; notifyProgress(prog, lastProg); lastProg = prog; mrp.remove(natOp); } } continue; } // Initially, all jobs are in wait state. List<Job> jobsWithoutIds = jc.getWaitingJobs(); log.info(jobsWithoutIds.size() + " map-reduce job(s) waiting for submission."); //notify listeners about jobs submitted MRScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size()); // update Pig stats' job DAG with just compiled jobs MRPigStatsUtil.updateJobMroMap(jcc.getJobMroMap()); // determine job tracker url String jobTrackerLoc; JobConf jobConf = jobsWithoutIds.get(0).getJobConf(); try { String port = jobConf.get(MRConfiguration.JOB_TRACKER_HTTP_ADDRESS); String jobTrackerAdd = jobConf.get(MRConfiguration.JOB_TRACKER); jobTrackerLoc = jobTrackerAdd.substring(0, jobTrackerAdd.indexOf(":")) + port.substring(port.indexOf(":")); } catch (Exception e) { // Could not get the job tracker location, most probably we are running in local mode. // If it is the case, we don't print out job tracker location, // because it is meaningless for local mode. jobTrackerLoc = null; log.debug("Failed to get job tracker location."); } completeFailedJobsInThisRun.clear(); // Set the thread UDFContext so registered classes are available. final UDFContext udfContext = UDFContext.getUDFContext(); Thread jcThread = new Thread(jc, "JobControl") { @Override public void run() { UDFContext.setUdfContext(udfContext.clone()); //PIG-2576 super.run(); } }; jcThread.setUncaughtExceptionHandler(jctExceptionHandler); jcThread.setContextClassLoader(PigContext.getClassLoader()); // mark the times that the jobs were submitted so it's reflected in job history props for (Job job : jc.getWaitingJobs()) { JobConf jobConfCopy = job.getJobConf(); jobConfCopy.set("pig.script.submitted.timestamp", Long.toString(scriptSubmittedTimestamp)); jobConfCopy.set("pig.job.submitted.timestamp", Long.toString(System.currentTimeMillis())); job.setJobConf(jobConfCopy); } //All the setup done, now lets launch the jobs. jcThread.start(); try { // a flag whether to warn failure during the loop below, so users can notice failure earlier. boolean warn_failure = true; // Now wait, till we are finished. while (!jc.allFinished()) { try { jcThread.join(sleepTime); } catch (InterruptedException e) { } List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>(); for (Job job : jobsWithoutIds) { if (job.getAssignedJobID() != null) { jobsAssignedIdInThisRun.add(job); log.info("HadoopJobId: " + job.getAssignedJobID()); // display the aliases being processed MapReduceOper mro = jcc.getJobMroMap().get(job); if (mro != null) { String alias = MRScriptState.get().getAlias(mro); log.info("Processing aliases " + alias); String aliasLocation = MRScriptState.get().getAliasLocation(mro); log.info("detailed locations: " + aliasLocation); } if (!HadoopShims.isHadoopYARN() && jobTrackerLoc != null) { log.info("More information at: http://" + jobTrackerLoc + "/jobdetails.jsp?jobid=" + job.getAssignedJobID()); } // update statistics for this job so jobId is set MRPigStatsUtil.addJobStats(job); MRScriptState.get().emitJobStartedNotification(job.getAssignedJobID().toString()); } else { // This job is not assigned an id yet. } } jobsWithoutIds.removeAll(jobsAssignedIdInThisRun); double prog = (numMRJobsCompl + calculateProgress(jc)) / totalMRJobs; if (notifyProgress(prog, lastProg)) { List<Job> runnJobs = jc.getRunningJobs(); if (runnJobs != null) { StringBuilder msg = new StringBuilder(); for (Object object : runnJobs) { Job j = (Job) object; if (j != null) { msg.append(j.getAssignedJobID()).append(","); } } if (msg.length() > 0) { msg.setCharAt(msg.length() - 1, ']'); log.info("Running jobs are [" + msg); } } lastProg = prog; } // collect job stats by frequently polling of completed jobs (PIG-1829) MRPigStatsUtil.accumulateStats(jc); // if stop_on_failure is enabled, we need to stop immediately when any job has failed checkStopOnFailure(stop_on_failure); // otherwise, we just display a warning message if there's any failure if (warn_failure && !jc.getFailedJobs().isEmpty()) { // we don't warn again for this group of jobs warn_failure = false; log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you " + "want Pig to stop immediately on failure."); } } //check for the jobControlException first //if the job controller fails before launching the jobs then there are //no jobs to check for failure if (jobControlException != null) { if (jobControlException instanceof PigException) { if (jobControlExceptionStackTrace != null) { LogUtils.writeLog("Error message from job controller", jobControlExceptionStackTrace, pc.getProperties().getProperty("pig.logfile"), log); } throw jobControlException; } else { int errCode = 2117; String msg = "Unexpected error when launching map reduce job."; throw new ExecException(msg, errCode, PigException.BUG, jobControlException); } } if (!jc.getFailedJobs().isEmpty()) { // stop if stop_on_failure is enabled checkStopOnFailure(stop_on_failure); // If we only have one store and that job fail, then we sure // that the job completely fail, and we shall stop dependent jobs for (Job job : jc.getFailedJobs()) { completeFailedJobsInThisRun.add(job); log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs"); } failedJobs.addAll(jc.getFailedJobs()); } int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun); numMRJobsCompl += removedMROp; List<Job> jobs = jc.getSuccessfulJobs(); jcc.moveResults(jobs); succJobs.addAll(jobs); // collecting final statistics MRPigStatsUtil.accumulateStats(jc); } catch (Exception e) { throw e; } finally { jc.stop(); } } MRScriptState.get().emitProgressUpdatedNotification(100); log.info("100% complete"); boolean failed = false; if (failedNativeMR.size() > 0) { failed = true; } if (Boolean.valueOf(pc.getProperties().getProperty(PigConfiguration.PIG_DELETE_TEMP_FILE, "true"))) { // Clean up all the intermediate data for (String path : intermediateVisitor.getIntermediate()) { // Skip non-file system paths such as hbase, see PIG-3617 if (HadoopShims.hasFileSystemImpl(new Path(path), conf)) { FileLocalizer.delete(path, pc); } } } // Look to see if any jobs failed. If so, we need to report that. if (failedJobs != null && failedJobs.size() > 0) { Exception backendException = null; for (Job fj : failedJobs) { try { getStats(fj, true, pc); } catch (Exception e) { backendException = e; } List<POStore> sts = jcc.getStores(fj); for (POStore st : sts) { failureMap.put(st.getSFile(), backendException); } MRPigStatsUtil.setBackendException(fj, backendException); } failed = true; } // stats collection is done, log the results MRPigStatsUtil.stopCollection(true); // PigStatsUtil.stopCollection also computes the return code based on // total jobs to run, jobs successful and jobs failed failed = failed || !PigStats.get().isSuccessful(); Map<Enum, Long> warningAggMap = new HashMap<Enum, Long>(); if (succJobs != null) { for (Job job : succJobs) { List<POStore> sts = jcc.getStores(job); for (POStore st : sts) { if (Utils.isLocal(pc, job.getJobConf())) { HadoopShims.storeSchemaForLocal(job, st); } if (!st.isTmpStore()) { // create an "_SUCCESS" file in output location if // output location is a filesystem dir createSuccessFile(job, st); } else { log.debug("Successfully stored result in: \"" + st.getSFile().getFileName() + "\""); } } getStats(job, false, pc); if (aggregateWarning) { computeWarningAggregate(job, warningAggMap); } } } if (aggregateWarning) { CompilationMessageCollector.logAggregate(warningAggMap, MessageType.Warning, log); } if (!failed) { log.info("Success!"); } else { if (succJobs != null && succJobs.size() > 0) { log.info("Some jobs have failed! Stop running all dependent jobs"); } else { log.info("Failed!"); } } jcc.reset(); int ret = failed ? ((succJobs != null && succJobs.size() > 0) ? ReturnCode.PARTIAL_FAILURE : ReturnCode.FAILURE) : ReturnCode.SUCCESS; PigStats pigStats = PigStatsUtil.getPigStats(ret); // run cleanup for all of the stores for (OutputStats output : pigStats.getOutputStats()) { POStore store = output.getPOStore(); try { if (!output.isSuccessful()) { store.getStoreFunc().cleanupOnFailure(store.getSFile().getFileName(), new org.apache.hadoop.mapreduce.Job(output.getConf())); } else { store.getStoreFunc().cleanupOnSuccess(store.getSFile().getFileName(), new org.apache.hadoop.mapreduce.Job(output.getConf())); } } catch (IOException e) { throw new ExecException(e); } catch (AbstractMethodError nsme) { // Just swallow it. This means we're running against an // older instance of a StoreFunc that doesn't implement // this method. } } return pigStats; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.java
License:Apache License
/** * If stop_on_failure is enabled and any job has failed, an ExecException is thrown. * @param stop_on_failure whether it's enabled. * @throws ExecException If stop_on_failure is enabled and any job is failed *//*w w w . j av a 2s. c o m*/ private void checkStopOnFailure(boolean stop_on_failure) throws ExecException { if (jc.getFailedJobs().isEmpty()) return; if (stop_on_failure) { int errCode = 6017; StringBuilder msg = new StringBuilder(); for (int i = 0; i < jc.getFailedJobs().size(); i++) { Job j = jc.getFailedJobs().get(i); msg.append("JobID: " + j.getAssignedJobID() + " Reason: " + j.getMessage()); if (i != jc.getFailedJobs().size() - 1) { msg.append("\n"); } } throw new ExecException(msg.toString(), errCode, PigException.REMOTE_ENVIRONMENT); } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.java
License:Apache License
private void getStats(Job job, boolean errNotDbg, PigContext pigContext) throws ExecException { JobID MRJobID = job.getAssignedJobID(); String jobMessage = job.getMessage(); Exception backendException = null; if (MRJobID == null) { try {// w ww . j a v a2 s . c om LogUtils.writeLog("Backend error message during job submission", jobMessage, pigContext.getProperties().getProperty("pig.logfile"), log); backendException = getExceptionFromString(jobMessage); } catch (Exception e) { int errCode = 2997; String msg = "Unable to recreate exception from backend error: " + jobMessage; throw new ExecException(msg, errCode, PigException.BUG); } throw new ExecException(backendException); } try { TaskReport[] mapRep = HadoopShims.getTaskReports(job, TaskType.MAP); if (mapRep != null) { getErrorMessages(mapRep, "map", errNotDbg, pigContext); totalHadoopTimeSpent += computeTimeSpent(mapRep); mapRep = null; } TaskReport[] redRep = HadoopShims.getTaskReports(job, TaskType.REDUCE); if (redRep != null) { getErrorMessages(redRep, "reduce", errNotDbg, pigContext); totalHadoopTimeSpent += computeTimeSpent(redRep); redRep = null; } } catch (IOException e) { if (job.getState() == Job.SUCCESS) { // if the job succeeded, let the user know that // we were unable to get statistics log.warn("Unable to get job related diagnostics"); } else { throw new ExecException(e); } } catch (Exception e) { throw new ExecException(e); } }
From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java
License:Apache License
public static TaskReport[] getTaskReports(Job job, TaskType type) throws IOException { if (job.getJobConf().getBoolean(PigConfiguration.PIG_NO_TASK_REPORT, false)) { LOG.info("TaskReports are disabled for job: " + job.getAssignedJobID()); return null; }/*from w w w .j a v a 2s .c o m*/ org.apache.hadoop.mapreduce.Job mrJob = job.getJob(); try { org.apache.hadoop.mapreduce.TaskReport[] reports = mrJob.getTaskReports(type); return DowngradeHelper.downgradeTaskReports(reports); } catch (InterruptedException ir) { throw new IOException(ir); } }
From source file:org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil.java
License:Apache License
/** * Returns the count for the given counter name in the counter group * 'MultiStoreCounters'//from w w w . j av a2s .c o m * * @param job the MR job * @param jobClient the Hadoop job client * @param counterName the counter name * @return the count of the given counter name */ public static long getMultiStoreCount(Job job, JobClient jobClient, String counterName) { long value = -1; try { RunningJob rj = jobClient.getJob(job.getAssignedJobID()); if (rj != null) { Counters.Counter counter = rj.getCounters().getGroup(MULTI_STORE_COUNTER_GROUP) .getCounterForName(counterName); value = counter.getValue(); } } catch (IOException e) { LOG.warn("Failed to get the counter for " + counterName, e); } return value; }