List of usage examples for org.apache.hadoop.mapred RunningJob isComplete
public boolean isComplete() throws IOException;
From source file:org.apache.oozie.command.wf.TestWorkflowActionKillXCommand.java
License:Apache License
public void testWfActionKillChildJob() throws Exception { String externalJobID = launchSleepJob(1000); String childId = launchSleepJob(1000000); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.KILLED, WorkflowInstance.Status.KILLED); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), externalJobID, "1", WorkflowAction.Status.KILLED, childId); new ActionKillXCommand(action.getId()).call(); JobClient jobClient = createJobClient(); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor(60 * 1000, new Predicate() { public boolean evaluate() throws Exception { return mrJob.isComplete(); }// w ww .j a v a2s . c o m }); assertEquals(mrJob.getJobState(), JobStatus.KILLED); }
From source file:org.apache.oozie.service.TestRecoveryService.java
License:Apache License
/** * Tests functionality of the Recovery Service Runnable command. </p> Starts an action with USER_RETRY status. * Runs the recovery runnable, and ensures the state changes to OK and the job completes successfully. * * @throws Exception/*from w w w . ja v a 2 s.c om*/ */ public void testWorkflowActionRecoveryUserRetry() throws Exception { final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job1 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action1 = this.addRecordToWfActionTable(job1.getId(), "1", WorkflowAction.Status.USER_RETRY); WorkflowJobBean job2 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action2 = createWorkflowActionSetPending(job2.getId(), WorkflowAction.Status.USER_RETRY); //Default recovery created time is 7 days. action2.setCreatedTime(new Date(new Date().getTime() - 8 * RecoveryService.ONE_DAY_MILLISCONDS)); WorkflowActionInsertJPAExecutor actionInsertCmd = new WorkflowActionInsertJPAExecutor(action2); jpaService.execute(actionInsertCmd); Runnable recoveryRunnable = new RecoveryRunnable(0, 60, 60); recoveryRunnable.run(); sleep(3000); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action1.getId()); waitFor(5000, new Predicate() { public boolean evaluate() throws Exception { WorkflowActionBean a = jpaService.execute(wfActionGetCmd); return a.getExternalId() != null; } }); action1 = jpaService.execute(wfActionGetCmd); assertNotNull(action1.getExternalId()); assertEquals(WorkflowAction.Status.RUNNING, action1.getStatus()); //Action 2 should not get recover as it's created time is older then 7 days action2 = WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, action2.getId()); assertNull(action2.getExternalId()); assertEquals(WorkflowAction.Status.USER_RETRY, action2.getStatus()); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job1, action1, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf())); String user = conf.get("user.name"); String group = conf.get("group.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action1.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor(240 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java
License:Apache License
/** * Submit a Pig job to hadoop./* w w w .jav a 2s .c o m*/ * * @param mapFuncs * a list of map functions to apply to the inputs. The cardinality of the list should * be the same as input's cardinality. * @param groupFuncs * a list of grouping functions to apply to the inputs. The cardinality of the list * should be the same as input's cardinality. * @param reduceFunc * the reduce function. * @param mapTasks * the number of map tasks to use. * @param reduceTasks * the number of reduce tasks to use. * @param input * a list of inputs * @param output * the path of the output. * @return an indicator of success or failure. * @throws IOException */ public boolean launchPig(POMapreduce pom) throws IOException { JobConf conf = new JobConf(config); setJobProperties(conf, pom); Properties properties = pom.pigContext.getProperties(); ConfigurationValidator.validatePigProperties(properties); String jobName = properties.getProperty(PigContext.JOB_NAME); conf.setJobName(jobName); boolean success = false; List<String> funcs = new ArrayList<String>(); if (pom.toMap != null) { for (EvalSpec es : pom.toMap) funcs.addAll(es.getFuncs()); } if (pom.groupFuncs != null) { for (EvalSpec es : pom.groupFuncs) funcs.addAll(es.getFuncs()); } if (pom.toReduce != null) { funcs.addAll(pom.toReduce.getFuncs()); } // create jobs.jar locally and pass it to hadoop File submitJarFile = File.createTempFile("Job", ".jar"); try { FileOutputStream fos = new FileOutputStream(submitJarFile); JarManager.createJar(fos, funcs, null, pom.pigContext); log.debug("Job jar size = " + submitJarFile.length()); conf.setJar(submitJarFile.getPath()); String user = System.getProperty("user.name"); conf.setUser(user != null ? user : "Pigster"); conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold")); conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size")); if (pom.reduceParallelism != -1) { conf.setNumReduceTasks(pom.reduceParallelism); } if (pom.toMap != null) { conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap)); } if (pom.toCombine != null) { conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine)); // this is to make sure that combiner is only called once // since we can't handle no combine or multiple combines conf.setCombineOnceOnly(true); } if (pom.groupFuncs != null) { conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs)); } if (pom.toReduce != null) { conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce)); } if (pom.toSplit != null) { conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit)); } if (pom.pigContext != null) { conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext)); } conf.setMapRunnerClass(PigMapReduce.class); if (pom.toCombine != null) { conf.setCombinerClass(PigCombine.class); //conf.setCombinerClass(PigMapReduce.class); } if (pom.quantilesFile != null) { conf.set("pig.quantilesFile", pom.quantilesFile); } else { // this is not a sort job - can use byte comparison to speed up processing conf.setOutputKeyComparatorClass(PigWritableComparator.class); } if (pom.partitionFunction != null) { conf.setPartitionerClass(SortPartitioner.class); } conf.setReducerClass(PigMapReduce.class); conf.setInputFormat(PigInputFormat.class); conf.setOutputFormat(PigOutputFormat.class); // not used starting with 0.15 conf.setInputKeyClass(Text.class); // not used starting with 0.15 conf.setInputValueClass(Tuple.class); conf.setOutputKeyClass(Tuple.class); if (pom.userComparator != null) { conf.setOutputKeyComparatorClass(pom.userComparator); } conf.setOutputValueClass(IndexedTuple.class); conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs)); conf.setOutputPath(new Path(pom.outputFileSpec.getFileName())); conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec())); // Setup the DistributedCache for this job setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true); setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false); // Setup the logs directory for this job String jobOutputFileName = pom.pigContext.getJobOutputFile(); if (jobOutputFileName != null && jobOutputFileName.length() > 0) { Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile()); conf.set("pig.output.dir", jobOutputFile.getParent().toString()); conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString()); } // // Now, actually submit the job (using the submit name) // JobClient jobClient = execEngine.getJobClient(); RunningJob status = jobClient.submitJob(conf); log.debug("submitted job: " + status.getJobID()); long sleepTime = 1000; double lastQueryProgress = -1.0; int lastJobsQueued = -1; double lastMapProgress = -1.0; double lastReduceProgress = -1.0; while (true) { try { Thread.sleep(sleepTime); } catch (Exception e) { } if (status.isComplete()) { success = status.isSuccessful(); if (log.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); sb.append("Job finished "); sb.append((success ? "" : "un")); sb.append("successfully"); log.debug(sb.toString()); } if (success) { mrJobNumber++; } double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } break; } else // still running { double mapProgress = status.mapProgress(); double reduceProgress = status.reduceProgress(); if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) { if (log.isDebugEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Hadoop job progress: Map="); sbProgress.append((int) (mapProgress * 100)); sbProgress.append("% Reduce="); sbProgress.append((int) (reduceProgress * 100)); sbProgress.append("%"); log.debug(sbProgress.toString()); } lastMapProgress = mapProgress; lastReduceProgress = reduceProgress; } double numJobsCompleted = mrJobNumber; double thisJobProgress = (mapProgress + reduceProgress) / 2.0; double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } } } // bug 1030028: if the input file is empty; hadoop doesn't create the output file! Path outputFile = conf.getOutputPath(); String outputName = outputFile.getName(); int colon = outputName.indexOf(':'); if (colon != -1) { outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon)); } try { ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs())) .asElement(outputFile.toString()); if (success && !descriptor.exists()) { // create an empty output file PigFile f = new PigFile(outputFile.toString(), false); f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext); } } catch (DataStorageException e) { throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e); } if (!success) { // go find the error messages getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map"); getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce"); } else { long timeSpent = 0; // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet. TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID()); TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID()); for (TaskReport r : mapReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } for (TaskReport r : reduceReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } totalHadoopTimeSpent += timeSpent; } } catch (Exception e) { // Do we need different handling for different exceptions e.printStackTrace(); throw WrappedIOException.wrap(e); } finally { submitJarFile.delete(); } return success; }
From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { result.setNrErrors(0);/*from w w w. j ava2s.c o m*/ Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution); hadoopDistro = environmentSubstitute(hadoopDistro); if (Const.isEmpty(hadoopDistro)) { hadoopDistro = "generic"; } try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$ e.toString())); logError(Const.getStackTracker(e)); } try { URL resolvedJarUrl = null; String jarUrlS = environmentSubstitute(jarUrl); if (jarUrlS.indexOf("://") == -1) { // default to file:// File jarFile = new File(jarUrlS); resolvedJarUrl = jarFile.toURI().toURL(); } else { resolvedJarUrl = new URL(jarUrlS); } final String cmdLineArgsS = environmentSubstitute(cmdLineArgs); if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar", resolvedJarUrl.toExternalForm())); if (isSimple) { /* final AtomicInteger taskCount = new AtomicInteger(0); final AtomicInteger successCount = new AtomicInteger(0); final AtomicInteger failedCount = new AtomicInteger(0); */ if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode")); List<Class<?>> classesWithMains = JarUtility .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader()); for (final Class<?> clazz : classesWithMains) { Runnable r = new Runnable() { public void run() { try { final ClassLoader cl = Thread.currentThread().getContextClassLoader(); try { // taskCount.incrementAndGet(); Thread.currentThread().setContextClassLoader(clazz.getClassLoader()); Method mainMethod = clazz.getMethod("main", new Class[] { String[].class }); Object[] args = (cmdLineArgsS != null) ? new Object[] { cmdLineArgsS.split(" ") } : new Object[0]; mainMethod.invoke(null, args); } finally { Thread.currentThread().setContextClassLoader(cl); // successCount.incrementAndGet(); // taskCount.decrementAndGet(); } } catch (Throwable ignored) { // skip, try the next one // logError(ignored.getMessage()); // failedCount.incrementAndGet(); ignored.printStackTrace(); } } }; Thread t = new Thread(r); t.start(); } // uncomment to implement blocking /* if (blocking) { while (taskCount.get() > 0 && !parentJob.isStopped()) { Thread.sleep(1000); } if (!parentJob.isStopped()) { result.setResult(successCount.get() > 0); result.setNrErrors((successCount.get() > 0) ? 0 : 1); } else { // we can't really know at this stage if // the hadoop job will finish successfully // because we have to stop now result.setResult(true); // look on the bright side of life :-)... result.setNrErrors(0); } } else { */ // non-blocking - just set success equal to no failures arising // from invocation // result.setResult(failedCount.get() == 0); // result.setNrErrors(failedCount.get()); result.setResult(true); result.setNrErrors(0); /* } */ } else { if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode")); URL[] urls = new URL[] { resolvedJarUrl }; URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader()); JobConf conf = new JobConf(); String hadoopJobNameS = environmentSubstitute(hadoopJobName); conf.setJobName(hadoopJobNameS); String outputKeyClassS = environmentSubstitute(outputKeyClass); conf.setOutputKeyClass(loader.loadClass(outputKeyClassS)); String outputValueClassS = environmentSubstitute(outputValueClass); conf.setOutputValueClass(loader.loadClass(outputValueClassS)); if (mapperClass != null) { String mapperClassS = environmentSubstitute(mapperClass); Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS); conf.setMapperClass(mapper); } if (combinerClass != null) { String combinerClassS = environmentSubstitute(combinerClass); Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS); conf.setCombinerClass(combiner); } if (reducerClass != null) { String reducerClassS = environmentSubstitute(reducerClass); Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS); conf.setReducerClass(reducer); } if (inputFormatClass != null) { String inputFormatClassS = environmentSubstitute(inputFormatClass); Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader .loadClass(inputFormatClassS); conf.setInputFormat(inputFormat); } if (outputFormatClass != null) { String outputFormatClassS = environmentSubstitute(outputFormatClass); Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader .loadClass(outputFormatClassS); conf.setOutputFormat(outputFormat); } String hdfsHostnameS = environmentSubstitute(hdfsHostname); String hdfsPortS = environmentSubstitute(hdfsPort); String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname); String jobTrackerPortS = environmentSubstitute(jobTrackerPort); // See if we can auto detect the distribution first HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer(); if (configurer == null) { // go with what has been selected by the user configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro); // if the user-specified distribution is detectable, make sure it is still // the current distribution! if (configurer != null && configurer.isDetectable()) { if (!configurer.isAvailable()) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent", configurer.distributionName())); } } } if (configurer == null) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro)); } logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage", configurer.distributionName())); List<String> configMessages = new ArrayList<String>(); configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf, configMessages); for (String m : configMessages) { logBasic(m); } String inputPathS = environmentSubstitute(inputPath); String[] inputPathParts = inputPathS.split(","); List<Path> paths = new ArrayList<Path>(); for (String path : inputPathParts) { paths.add(new Path(configurer.getFilesystemURL() + path)); } Path[] finalPaths = paths.toArray(new Path[paths.size()]); //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS)); FileInputFormat.setInputPaths(conf, finalPaths); String outputPathS = environmentSubstitute(outputPath); FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS)); // process user defined values for (UserDefinedItem item : userDefined) { if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null && !"".equals(item.getValue())) { String nameS = environmentSubstitute(item.getName()); String valueS = environmentSubstitute(item.getValue()); conf.set(nameS, valueS); } } String workingDirectoryS = environmentSubstitute(workingDirectory); conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS)); conf.setJar(jarUrl); String numMapTasksS = environmentSubstitute(numMapTasks); String numReduceTasksS = environmentSubstitute(numReduceTasks); int numM = 1; try { numM = Integer.parseInt(numMapTasksS); } catch (NumberFormatException e) { logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num" + "map tasks to 1"); } int numR = 1; try { numR = Integer.parseInt(numReduceTasksS); } catch (NumberFormatException e) { logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num" + "reduce tasks to 1"); } conf.setNumMapTasks(numM); conf.setNumReduceTasks(numR); JobClient jobClient = new JobClient(conf); RunningJob runningJob = jobClient.submitJob(conf); String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException e) { logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting " + "logging interval to 60"); } if (blocking) { try { int taskCompletionEventIndex = 0; while (!parentJob.isStopped() && !runningJob.isComplete()) { if (logIntv >= 1) { printJobStatus(runningJob); taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex); Thread.sleep(logIntv * 1000); } else { Thread.sleep(60000); } } if (parentJob.isStopped() && !runningJob.isComplete()) { // We must stop the job running on Hadoop runningJob.killJob(); // Indicate this job entry did not complete result.setResult(false); } printJobStatus(runningJob); // Log any messages we may have missed while polling logTaskMessages(runningJob, taskCompletionEventIndex); } catch (InterruptedException ie) { logError(ie.getMessage(), ie); } // Entry is successful if the MR job is successful overall result.setResult(runningJob.isSuccessful()); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java
License:Open Source License
@Test public void submitJob() throws Exception { String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input", "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" }; JobConf conf = new JobConf(); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./test-res/pentaho-mapreduce-sample.jar"); URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() }); conf.setMapperClass(/*from www. j av a2 s .c o m*/ (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map")); conf.setCombinerClass((Class<? extends Reducer>) loader .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce")); conf.setReducerClass((Class<? extends Reducer>) loader .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce")); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount")); conf.setWorkingDirectory(new Path("/tmp/wordcount")); JobClient jobClient = new JobClient(conf); ClusterStatus status = jobClient.getClusterStatus(); assertEquals(State.RUNNING, status.getJobTrackerState()); RunningJob runningJob = jobClient.submitJob(conf); System.out.print("Running " + runningJob.getJobName() + ""); while (!runningJob.isComplete()) { System.out.print("."); Thread.sleep(500); } System.out.println(); System.out.println("Finished " + runningJob.getJobName() + "."); FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000")); String output = IOUtils.toString(file.getContent().getInputStream()); assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output); }
From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java
License:Open Source License
@Test public void submitJob() throws Exception { String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input", "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" }; JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();//from w w w. j av a 2s. c o m TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr"); TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); transMeta = new TransMeta("./test-res/wordcount-reducer.ktr"); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() }); conf.setMapperClass( (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap")); conf.setCombinerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setReducerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); JobClient jobClient = new JobClient(conf); ClusterStatus status = jobClient.getClusterStatus(); assertEquals(State.RUNNING, status.getJobTrackerState()); RunningJob runningJob = jobClient.submitJob(conf); System.out.print("Running " + runningJob.getJobName() + ""); while (!runningJob.isComplete()) { System.out.print("."); Thread.sleep(500); } System.out.println(); System.out.println("Finished " + runningJob.getJobName() + "."); FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000")); String output = IOUtils.toString(file.getContent().getInputStream()); assertEquals( "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n", output); }
From source file:org.pig.oink.operation.impl.PigJobServerImpl.java
License:Apache License
@Override public boolean cancelRequest(String requestId) throws IOException, Exception { PigRequestStats stats = null;/*w ww.j a v a 2 s.co m*/ try { stats = getRequestStats(requestId); } catch (IllegalArgumentException e) { logger.error("Invalid request ID", e); throw new IllegalArgumentException("Invalid request ID"); } catch (Exception e) { logger.error("Unable to get list of jobs", e); throw new IOException("Unable to get list of jobs"); } if (stats.getStatus().equals(Status.SUBMITTED.toString())) { List<String> jobs = stats.getJobs(); for (String job : jobs) { job = job.substring(JT_UI.length()); JobConf jobConf = new JobConf(); jobConf.set("fs.default.name", PropertyLoader.getInstance().getProperty("fs.default.name")); jobConf.set("mapred.job.tracker", PropertyLoader.getInstance().getProperty("jobtracker")); try { JobClient jobClient = new JobClient(jobConf); RunningJob rJob = jobClient.getJob(JobID.forName(job)); if (!rJob.isComplete()) { rJob.killJob(); } } catch (Exception e) { throw new Exception("Unable to kill job " + job); } } PigRequestStats requestStats = new PigRequestStats(0, 0, null, jobs.size()); requestStats.setJobs(jobs); requestStats.setStatus(Status.KILLED.toString()); Path statsPath = new Path( PropertyLoader.getInstance().getProperty(Constants.REQUEST_PATH) + requestId + "/stats"); PigUtils.writeStatsFile(statsPath, requestStats); return true; } else { return false; } }
From source file:org.pooledtimeseries.healthcheck.CheckCartesianProductSeqFile.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { long start = System.currentTimeMillis(); JobConf conf = new JobConf("Cartesian Product"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>"); System.exit(1);//from w ww . j a va2s . c o m } // Configure the join type conf.setJarByClass(CheckCartesianProductSeqFile.class); conf.setMapperClass(CartesianMapper.class); conf.setReducerClass(CartesianReducer.class); conf.setInputFormat(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { Thread.sleep(1000); } long finish = System.currentTimeMillis(); System.out.println("Time in ms: " + (finish - start)); System.exit(job.isSuccessful() ? 0 : 2); }
From source file:setest.FormatStorageMR.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatStorageMR <input> <output>"); System.exit(-1);/*from w w w . ja v a2 s . c om*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("FormatStorageMR"); conf.setNumMapTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(FormatStorageTestMapper.class); conf.setReducerClass(FormatStorageTestReducer.class); conf.setInputFormat(FormatStorageInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }