List of usage examples for org.apache.hadoop.mapred RunningJob isSuccessful
public boolean isSuccessful() throws IOException;
From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java
License:Apache License
public void testSqoopEval() throws Exception { createDB();/*from w ww. j a v a 2 s . co m*/ Context context = createContext(getActionXmlEval()); final RunningJob launcherJob = submitAction(context); String launcherId = context.getAction().getExternalId(); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), context.getProtoActionConf()); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); SqoopActionExecutor ae = new SqoopActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getExternalChildIDs()); assertEquals(0, context.getAction().getExternalChildIDs().length()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS); assertNotNull(hadoopCounters); assertTrue(hadoopCounters.isEmpty()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(0, outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length()); }
From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java
License:Apache License
public void testSqoopActionFreeFormQuery() throws Exception { createDB();/* w w w . j a v a 2 s.c o m*/ Context context = createContext(getActionXmlFreeFromQuery()); final RunningJob launcherJob = submitAction(context); String launcherId = context.getAction().getExternalId(); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), context.getProtoActionConf()); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); SqoopActionExecutor ae = new SqoopActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getExternalChildIDs()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS); assertNotNull(hadoopCounters); assertFalse(hadoopCounters.isEmpty()); FileSystem fs = getFileSystem(); FileStatus[] parts = fs.listStatus(new Path(getSqoopOutputDir()), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part-"); } }); int count = 0; for (FileStatus part : parts) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(part.getPath()))); String line = br.readLine(); while (line != null) { assertTrue(line.contains("a")); count++; line = br.readLine(); } br.close(); } assertEquals(3, count); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); }
From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java
License:Apache License
public void testActionCheck() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); }/*from w w w . j a v a2 s . co m*/ }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); String mapperId = action.getExternalId(); String childId = action.getExternalChildIDs(); assertTrue(launcherId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(action.getId()).call(); action = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action.getExternalStatus()); }
From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java
License:Apache License
public void testActionCheckTransientDuringLauncher() throws Exception { // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN if (createJobConf().get("yarn.resourcemanager.address") != null) { return;/*from ww w . j av a 2 s. c om*/ } services.destroy(); // Make the max number of retries lower so the test won't take as long final int maxRetries = 2; setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries)); services = new Services(); // Disable ActionCheckerService so it doesn't interfere by triggering any extra ActionCheckXCommands setClassesToBeExcluded(services.getConf(), new String[] { "org.apache.oozie.service.ActionCheckerService" }); services.init(); final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); final String jobId = job0.getId(); WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP); final String actionId = action0.getId(); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId); new ActionStartXCommand(actionId, "map-reduce").call(); final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd); String originalLauncherId = action1.getExternalId(); // At this point, the launcher job has started (but not finished) // Now, shutdown the job tracker to pretend it has gone down during the launcher job executeWhileJobTrackerIsShutdown(new ShutdownJobTrackerExecutable() { @Override public void execute() throws Exception { assertEquals(0, action1.getRetries()); new ActionCheckXCommand(actionId).call(); waitFor(30 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() > 0); } }); waitFor(180 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() == 0); } }); WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd); assertEquals(0, action1b.getRetries()); assertEquals("START_MANUAL", action1b.getStatusStr()); WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("SUSPENDED", job1.getStatusStr()); // At this point, the action has gotten a transient error, even after maxRetries tries so the workflow has been // SUSPENDED } }); // Now, lets bring the job tracker back up and resume the workflow (which will restart the current action) // It should now continue and finish with SUCCEEDED new ResumeXCommand(jobId).call(); WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("RUNNING", job2.getStatusStr()); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job2, action1, false, false); WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action2.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd); String launcherId = action3.getExternalId(); assertFalse(originalLauncherId.equals(launcherId)); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor(120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd); String mapperId = action4.getExternalId(); String childId = action4.getExternalChildIDs(); assertTrue(launcherId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(childId)); waitFor(120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action5.getExternalStatus()); }
From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java
License:Apache License
public void testActionCheckTransientDuringMRAction() throws Exception { // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN if (createJobConf().get("yarn.resourcemanager.address") != null) { return;/*from ww w . ja v a 2 s. c om*/ } services.destroy(); // Make the max number of retries lower so the test won't take as long final int maxRetries = 2; setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries)); services = new Services(); // Disable ActionCheckerService so it doesn't interfere by triggering any extra ActionCheckXCommands setClassesToBeExcluded(services.getConf(), new String[] { "org.apache.oozie.service.ActionCheckerService" }); services.init(); final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); final String jobId = job0.getId(); WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP); final String actionId = action0.getId(); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId); new ActionStartXCommand(actionId, "map-reduce").call(); final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd); String originalLauncherId = action1.getExternalId(); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job0, action1, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId)); waitFor(120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(action1.getId()).call(); WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd); String originalMapperId = action2.getExternalChildIDs(); assertFalse(originalLauncherId.equals(originalMapperId)); // At this point, the launcher job has finished and the map-reduce action has started (but not finished) // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job executeWhileJobTrackerIsShutdown(new ShutdownJobTrackerExecutable() { @Override public void execute() throws Exception { assertEquals(0, action1.getRetries()); new ActionCheckXCommand(actionId).call(); waitFor(30 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() > 0); } }); waitFor(180 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd); return (action1a.getRetries() == 0); } }); WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd); assertEquals(0, action1b.getRetries()); assertEquals("START_MANUAL", action1b.getStatusStr()); WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("SUSPENDED", job1.getStatusStr()); // At this point, the action has gotten a transient error, even after maxRetries tries so the workflow has been // SUSPENDED } }); // Now, lets bring the job tracker back up and resume the workflow (which will restart the current action) // It should now continue and finish with SUCCEEDED new ResumeXCommand(jobId).call(); WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); assertEquals("RUNNING", job2.getStatusStr()); sleep(500); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd); String launcherId = action3.getExternalId(); assertFalse(originalLauncherId.equals(launcherId)); final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId)); waitFor(120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return launcherJob2.isComplete(); } }); assertTrue(launcherJob2.isSuccessful()); actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd); String mapperId = action4.getExternalChildIDs(); assertFalse(originalMapperId.equals(mapperId)); final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId)); waitFor(120 * 1000, new Predicate() { @Override public boolean evaluate() throws Exception { return mrJob.isComplete(); } }); assertTrue(mrJob.isSuccessful()); new ActionCheckXCommand(actionId).call(); WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd); assertEquals("SUCCEEDED", action5.getExternalStatus()); }
From source file:org.apache.oozie.command.wf.TestActionStartXCommand.java
License:Apache License
public void testActionStart() throws Exception { JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); assertNotNull(action.getExternalId()); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); }/*from ww w. j a v a2 s . c om*/ }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); }
From source file:org.apache.oozie.command.wf.TestActionStartXCommand.java
License:Apache License
/** * Test workflow action with CDATA section and string configuration that needs to be escaped can be submitted without * throwing exceptions.//from www. j av a2 s . com * <p/> * Escaped string needs to be 'escaped' before converting to XML Document, otherwise, * exception will be thrown. * @see org.apache.oozie.DagELFunctions#configureEvaluator(org.apache.oozie.util.ELEvaluator.ELEvaluator evaluator, org.apache.oozie.WorkflowJobBean, org.apache.oozie.WorkflowActionBean) * * @throws Exception thrown if failed to execute test case */ public void testActionWithEscapedStringAndCDATA() throws Exception { // create workflow job and action beans with escaped parameters and CDATA value JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job = this.addRecordToWfJobTableWithEscapedStringAndCDATA(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action = this.addRecordToWfActionTableWithEscapedStringAndCDATA(job.getId(), WorkflowAction.Status.PREP); WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); // start workflow action new ActionStartXCommand(action.getId(), "map-reduce").call(); action = jpaService.execute(wfActionGetCmd); assertNotNull(action.getExternalId()); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf())); String user = conf.get("user.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action.getExternalId(); // retrieve launcher job final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); // time out after 120 seconds unless launcher job succeeds waitFor(240 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); // check if launcher job succeeds assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); }
From source file:org.apache.oozie.service.TestRecoveryService.java
License:Apache License
/** * Tests functionality of the Recovery Service Runnable command. </p> Starts an action with USER_RETRY status. * Runs the recovery runnable, and ensures the state changes to OK and the job completes successfully. * * @throws Exception/*from w ww . jav a 2s . co m*/ */ public void testWorkflowActionRecoveryUserRetry() throws Exception { final JPAService jpaService = Services.get().get(JPAService.class); WorkflowJobBean job1 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action1 = this.addRecordToWfActionTable(job1.getId(), "1", WorkflowAction.Status.USER_RETRY); WorkflowJobBean job2 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); WorkflowActionBean action2 = createWorkflowActionSetPending(job2.getId(), WorkflowAction.Status.USER_RETRY); //Default recovery created time is 7 days. action2.setCreatedTime(new Date(new Date().getTime() - 8 * RecoveryService.ONE_DAY_MILLISCONDS)); WorkflowActionInsertJPAExecutor actionInsertCmd = new WorkflowActionInsertJPAExecutor(action2); jpaService.execute(actionInsertCmd); Runnable recoveryRunnable = new RecoveryRunnable(0, 60, 60); recoveryRunnable.run(); sleep(3000); final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action1.getId()); waitFor(5000, new Predicate() { public boolean evaluate() throws Exception { WorkflowActionBean a = jpaService.execute(wfActionGetCmd); return a.getExternalId() != null; } }); action1 = jpaService.execute(wfActionGetCmd); assertNotNull(action1.getExternalId()); assertEquals(WorkflowAction.Status.RUNNING, action1.getStatus()); //Action 2 should not get recover as it's created time is older then 7 days action2 = WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, action2.getId()); assertNull(action2.getExternalId()); assertEquals(WorkflowAction.Status.USER_RETRY, action2.getStatus()); ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job1, action1, false, false); MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor(); JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf())); String user = conf.get("user.name"); String group = conf.get("group.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf); String launcherId = action1.getExternalId(); final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId)); waitFor(240 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertTrue(LauncherMapperHelper.hasIdSwap(actionData)); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java
License:Apache License
/** * Submit a Pig job to hadoop./* ww w. j ava2 s . c om*/ * * @param mapFuncs * a list of map functions to apply to the inputs. The cardinality of the list should * be the same as input's cardinality. * @param groupFuncs * a list of grouping functions to apply to the inputs. The cardinality of the list * should be the same as input's cardinality. * @param reduceFunc * the reduce function. * @param mapTasks * the number of map tasks to use. * @param reduceTasks * the number of reduce tasks to use. * @param input * a list of inputs * @param output * the path of the output. * @return an indicator of success or failure. * @throws IOException */ public boolean launchPig(POMapreduce pom) throws IOException { JobConf conf = new JobConf(config); setJobProperties(conf, pom); Properties properties = pom.pigContext.getProperties(); ConfigurationValidator.validatePigProperties(properties); String jobName = properties.getProperty(PigContext.JOB_NAME); conf.setJobName(jobName); boolean success = false; List<String> funcs = new ArrayList<String>(); if (pom.toMap != null) { for (EvalSpec es : pom.toMap) funcs.addAll(es.getFuncs()); } if (pom.groupFuncs != null) { for (EvalSpec es : pom.groupFuncs) funcs.addAll(es.getFuncs()); } if (pom.toReduce != null) { funcs.addAll(pom.toReduce.getFuncs()); } // create jobs.jar locally and pass it to hadoop File submitJarFile = File.createTempFile("Job", ".jar"); try { FileOutputStream fos = new FileOutputStream(submitJarFile); JarManager.createJar(fos, funcs, null, pom.pigContext); log.debug("Job jar size = " + submitJarFile.length()); conf.setJar(submitJarFile.getPath()); String user = System.getProperty("user.name"); conf.setUser(user != null ? user : "Pigster"); conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold")); conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size")); if (pom.reduceParallelism != -1) { conf.setNumReduceTasks(pom.reduceParallelism); } if (pom.toMap != null) { conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap)); } if (pom.toCombine != null) { conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine)); // this is to make sure that combiner is only called once // since we can't handle no combine or multiple combines conf.setCombineOnceOnly(true); } if (pom.groupFuncs != null) { conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs)); } if (pom.toReduce != null) { conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce)); } if (pom.toSplit != null) { conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit)); } if (pom.pigContext != null) { conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext)); } conf.setMapRunnerClass(PigMapReduce.class); if (pom.toCombine != null) { conf.setCombinerClass(PigCombine.class); //conf.setCombinerClass(PigMapReduce.class); } if (pom.quantilesFile != null) { conf.set("pig.quantilesFile", pom.quantilesFile); } else { // this is not a sort job - can use byte comparison to speed up processing conf.setOutputKeyComparatorClass(PigWritableComparator.class); } if (pom.partitionFunction != null) { conf.setPartitionerClass(SortPartitioner.class); } conf.setReducerClass(PigMapReduce.class); conf.setInputFormat(PigInputFormat.class); conf.setOutputFormat(PigOutputFormat.class); // not used starting with 0.15 conf.setInputKeyClass(Text.class); // not used starting with 0.15 conf.setInputValueClass(Tuple.class); conf.setOutputKeyClass(Tuple.class); if (pom.userComparator != null) { conf.setOutputKeyComparatorClass(pom.userComparator); } conf.setOutputValueClass(IndexedTuple.class); conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs)); conf.setOutputPath(new Path(pom.outputFileSpec.getFileName())); conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec())); // Setup the DistributedCache for this job setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true); setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false); // Setup the logs directory for this job String jobOutputFileName = pom.pigContext.getJobOutputFile(); if (jobOutputFileName != null && jobOutputFileName.length() > 0) { Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile()); conf.set("pig.output.dir", jobOutputFile.getParent().toString()); conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString()); } // // Now, actually submit the job (using the submit name) // JobClient jobClient = execEngine.getJobClient(); RunningJob status = jobClient.submitJob(conf); log.debug("submitted job: " + status.getJobID()); long sleepTime = 1000; double lastQueryProgress = -1.0; int lastJobsQueued = -1; double lastMapProgress = -1.0; double lastReduceProgress = -1.0; while (true) { try { Thread.sleep(sleepTime); } catch (Exception e) { } if (status.isComplete()) { success = status.isSuccessful(); if (log.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); sb.append("Job finished "); sb.append((success ? "" : "un")); sb.append("successfully"); log.debug(sb.toString()); } if (success) { mrJobNumber++; } double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } break; } else // still running { double mapProgress = status.mapProgress(); double reduceProgress = status.reduceProgress(); if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) { if (log.isDebugEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Hadoop job progress: Map="); sbProgress.append((int) (mapProgress * 100)); sbProgress.append("% Reduce="); sbProgress.append((int) (reduceProgress * 100)); sbProgress.append("%"); log.debug(sbProgress.toString()); } lastMapProgress = mapProgress; lastReduceProgress = reduceProgress; } double numJobsCompleted = mrJobNumber; double thisJobProgress = (mapProgress + reduceProgress) / 2.0; double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs); if (queryProgress > lastQueryProgress) { if (log.isInfoEnabled()) { StringBuilder sbProgress = new StringBuilder(); sbProgress.append("Pig progress = "); sbProgress.append(((int) (queryProgress * 100))); sbProgress.append("%"); log.info(sbProgress.toString()); } lastQueryProgress = queryProgress; } } } // bug 1030028: if the input file is empty; hadoop doesn't create the output file! Path outputFile = conf.getOutputPath(); String outputName = outputFile.getName(); int colon = outputName.indexOf(':'); if (colon != -1) { outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon)); } try { ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs())) .asElement(outputFile.toString()); if (success && !descriptor.exists()) { // create an empty output file PigFile f = new PigFile(outputFile.toString(), false); f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext); } } catch (DataStorageException e) { throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e); } if (!success) { // go find the error messages getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map"); getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce"); } else { long timeSpent = 0; // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet. TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID()); TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID()); for (TaskReport r : mapReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } for (TaskReport r : reduceReports) { timeSpent += (r.getFinishTime() - r.getStartTime()); } totalHadoopTimeSpent += timeSpent; } } catch (Exception e) { // Do we need different handling for different exceptions e.printStackTrace(); throw WrappedIOException.wrap(e); } finally { submitJarFile.delete(); } return success; }
From source file:org.apache.pig.test.utils.datagen.HadoopRunner.java
License:Apache License
public void generate() throws IOException { // Configuration processed by ToolRunner // Create a JobConf using the processed conf JobConf job;/*from www . j a v a 2s .c o m*/ if (conf != null) { // TODO: conf could be null, check when and why job = new JobConf(conf); } else { job = new JobConf(new Configuration()); } fs = FileSystem.get(job); tmpHome = createTempDir(null); String config = genMapFiles().toUri().getRawPath(); // set config properties into job conf job.set(COLUMN_CONF_FILE_PATH, config); job.set(COLUMN_OUTPUT_SEPARATOR, String.valueOf((int) dgConf.getSeparator())); job.setJobName("data-gen"); job.setNumMapTasks(dgConf.getNumMappers()); job.setNumReduceTasks(0); job.setMapperClass(DataGenMapper.class); job.setJarByClass(DataGenMapper.class); // if inFile is specified, use it as input if (dgConf.getInFile() != null) { FileInputFormat.setInputPaths(job, dgConf.getInFile()); job.set(HAS_USER_INPUT, "true"); } else { job.set(HAS_USER_INPUT, "false"); Path input = genInputFiles(); FileInputFormat.setInputPaths(job, input); } FileOutputFormat.setOutputPath(job, new Path(dgConf.getOutputFile())); // Submit the job, then poll for progress until the job is complete System.out.println("Submit hadoop job..."); RunningJob j = JobClient.runJob(job); if (!j.isSuccessful()) { throw new IOException("Job failed"); } if (fs.exists(tmpHome)) { fs.delete(tmpHome, true); } }