Example usage for org.apache.hadoop.mapred RunningJob isSuccessful

List of usage examples for org.apache.hadoop.mapred RunningJob isSuccessful

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob isSuccessful.

Prototype

public boolean isSuccessful() throws IOException;

Source Link

Document

Check if the job completed successfully.

Usage

From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java

License:Apache License

public void testSqoopEval() throws Exception {
    createDB();/*from w ww. j  a v a 2  s . co  m*/

    Context context = createContext(getActionXmlEval());
    final RunningJob launcherJob = submitAction(context);
    String launcherId = context.getAction().getExternalId();
    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            context.getProtoActionConf());
    assertFalse(LauncherMapperHelper.hasIdSwap(actionData));

    SqoopActionExecutor ae = new SqoopActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));
    assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
    assertNotNull(context.getAction().getExternalChildIDs());
    assertEquals(0, context.getAction().getExternalChildIDs().length());
    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS);
    assertNotNull(hadoopCounters);
    assertTrue(hadoopCounters.isEmpty());

    assertNotNull(context.getAction().getData());
    Properties outputData = new Properties();
    outputData.load(new StringReader(context.getAction().getData()));
    assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
    assertEquals(0, outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length());
}

From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java

License:Apache License

public void testSqoopActionFreeFormQuery() throws Exception {
    createDB();/* w  w  w  . j  a v  a  2 s.c  o  m*/

    Context context = createContext(getActionXmlFreeFromQuery());
    final RunningJob launcherJob = submitAction(context);
    String launcherId = context.getAction().getExternalId();
    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            context.getProtoActionConf());
    assertFalse(LauncherMapperHelper.hasIdSwap(actionData));

    SqoopActionExecutor ae = new SqoopActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));
    assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
    assertNotNull(context.getAction().getExternalChildIDs());
    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS);
    assertNotNull(hadoopCounters);
    assertFalse(hadoopCounters.isEmpty());

    FileSystem fs = getFileSystem();
    FileStatus[] parts = fs.listStatus(new Path(getSqoopOutputDir()), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part-");
        }
    });
    int count = 0;
    for (FileStatus part : parts) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(part.getPath())));
        String line = br.readLine();
        while (line != null) {
            assertTrue(line.contains("a"));
            count++;
            line = br.readLine();
        }
        br.close();
    }
    assertEquals(3, count);

    assertNotNull(context.getAction().getData());
    Properties outputData = new Properties();
    outputData.load(new StringReader(context.getAction().getData()));
    assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
    assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
}

From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java

License:Apache License

public void testActionCheck() throws Exception {
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }/*from   w  w w . j  a v  a2  s  . co  m*/
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);
    String mapperId = action.getExternalId();
    String childId = action.getExternalChildIDs();

    assertTrue(launcherId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));

    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action.getExternalStatus());

}

From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java

License:Apache License

public void testActionCheckTransientDuringLauncher() throws Exception {
    // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN
    if (createJobConf().get("yarn.resourcemanager.address") != null) {
        return;/*from  ww  w  .  j av  a  2 s. c om*/
    }
    services.destroy();
    // Make the max number of retries lower so the test won't take as long
    final int maxRetries = 2;
    setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries));
    services = new Services();
    // Disable ActionCheckerService so it doesn't interfere by triggering any extra ActionCheckXCommands
    setClassesToBeExcluded(services.getConf(),
            new String[] { "org.apache.oozie.service.ActionCheckerService" });
    services.init();

    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    final String jobId = job0.getId();
    WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP);
    final String actionId = action0.getId();
    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId);

    new ActionStartXCommand(actionId, "map-reduce").call();
    final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd);
    String originalLauncherId = action1.getExternalId();

    // At this point, the launcher job has started (but not finished)
    // Now, shutdown the job tracker to pretend it has gone down during the launcher job
    executeWhileJobTrackerIsShutdown(new ShutdownJobTrackerExecutable() {
        @Override
        public void execute() throws Exception {
            assertEquals(0, action1.getRetries());
            new ActionCheckXCommand(actionId).call();

            waitFor(30 * 1000, new Predicate() {
                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() > 0);
                }
            });
            waitFor(180 * 1000, new Predicate() {
                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() == 0);
                }
            });
            WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd);
            assertEquals(0, action1b.getRetries());
            assertEquals("START_MANUAL", action1b.getStatusStr());

            WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
            assertEquals("SUSPENDED", job1.getStatusStr());

            // At this point, the action has gotten a transient error, even after maxRetries tries so the workflow has been
            // SUSPENDED
        }
    });
    // Now, lets bring the job tracker back up and resume the workflow (which will restart the current action)
    // It should now continue and finish with SUCCEEDED
    new ResumeXCommand(jobId).call();
    WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
    assertEquals("RUNNING", job2.getStatusStr());

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job2, action1, false, false);
    WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action2.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd);
    String launcherId = action3.getExternalId();
    assertFalse(originalLauncherId.equals(launcherId));

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(120 * 1000, new Predicate() {
        @Override
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd);
    String mapperId = action4.getExternalId();
    String childId = action4.getExternalChildIDs();

    assertTrue(launcherId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));

    waitFor(120 * 1000, new Predicate() {
        @Override
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action5.getExternalStatus());
}

From source file:org.apache.oozie.command.wf.TestActionCheckXCommand.java

License:Apache License

public void testActionCheckTransientDuringMRAction() throws Exception {
    // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN
    if (createJobConf().get("yarn.resourcemanager.address") != null) {
        return;/*from ww  w .  ja v a 2 s.  c  om*/
    }
    services.destroy();
    // Make the max number of retries lower so the test won't take as long
    final int maxRetries = 2;
    setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries));
    services = new Services();
    // Disable ActionCheckerService so it doesn't interfere by triggering any extra ActionCheckXCommands
    setClassesToBeExcluded(services.getConf(),
            new String[] { "org.apache.oozie.service.ActionCheckerService" });
    services.init();

    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    final String jobId = job0.getId();
    WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP);
    final String actionId = action0.getId();
    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId);

    new ActionStartXCommand(actionId, "map-reduce").call();
    final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd);
    String originalLauncherId = action1.getExternalId();

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job0, action1, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId));

    waitFor(120 * 1000, new Predicate() {
        @Override
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action1.getId()).call();
    WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd);
    String originalMapperId = action2.getExternalChildIDs();

    assertFalse(originalLauncherId.equals(originalMapperId));

    // At this point, the launcher job has finished and the map-reduce action has started (but not finished)
    // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job
    executeWhileJobTrackerIsShutdown(new ShutdownJobTrackerExecutable() {
        @Override
        public void execute() throws Exception {
            assertEquals(0, action1.getRetries());
            new ActionCheckXCommand(actionId).call();

            waitFor(30 * 1000, new Predicate() {
                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() > 0);
                }
            });
            waitFor(180 * 1000, new Predicate() {
                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() == 0);
                }
            });
            WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd);
            assertEquals(0, action1b.getRetries());
            assertEquals("START_MANUAL", action1b.getStatusStr());

            WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
            assertEquals("SUSPENDED", job1.getStatusStr());

            // At this point, the action has gotten a transient error, even after maxRetries tries so the workflow has been
            // SUSPENDED
        }
    });
    // Now, lets bring the job tracker back up and resume the workflow (which will restart the current action)
    // It should now continue and finish with SUCCEEDED
    new ResumeXCommand(jobId).call();
    WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
    assertEquals("RUNNING", job2.getStatusStr());

    sleep(500);

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd);
    String launcherId = action3.getExternalId();

    assertFalse(originalLauncherId.equals(launcherId));

    final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId));

    waitFor(120 * 1000, new Predicate() {
        @Override
        public boolean evaluate() throws Exception {
            return launcherJob2.isComplete();
        }
    });

    assertTrue(launcherJob2.isSuccessful());
    actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd);
    String mapperId = action4.getExternalChildIDs();
    assertFalse(originalMapperId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId));

    waitFor(120 * 1000, new Predicate() {
        @Override
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action5.getExternalStatus());
}

From source file:org.apache.oozie.command.wf.TestActionStartXCommand.java

License:Apache License

public void testActionStart() throws Exception {
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);
    assertNotNull(action.getExternalId());

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }/*from   ww  w. j a  v a2 s  . c  om*/
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
}

From source file:org.apache.oozie.command.wf.TestActionStartXCommand.java

License:Apache License

/**
 * Test workflow action with CDATA section and string configuration that needs to be escaped can be submitted without
 * throwing exceptions.//from www. j av  a2  s  .  com
 * <p/>
 * Escaped string needs to be 'escaped' before converting to XML Document, otherwise,
 * exception will be thrown.
 * @see org.apache.oozie.DagELFunctions#configureEvaluator(org.apache.oozie.util.ELEvaluator.ELEvaluator evaluator, org.apache.oozie.WorkflowJobBean, org.apache.oozie.WorkflowActionBean)
 *
 * @throws Exception thrown if failed to execute test case
 */
public void testActionWithEscapedStringAndCDATA() throws Exception {
    // create workflow job and action beans with escaped parameters and CDATA value
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job = this.addRecordToWfJobTableWithEscapedStringAndCDATA(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action = this.addRecordToWfActionTableWithEscapedStringAndCDATA(job.getId(),
            WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    // start workflow action
    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);
    assertNotNull(action.getExternalId());

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    // retrieve launcher job
    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    // time out after 120 seconds unless launcher job succeeds
    waitFor(240 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    // check if launcher job succeeds
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
}

From source file:org.apache.oozie.service.TestRecoveryService.java

License:Apache License

/**
 * Tests functionality of the Recovery Service Runnable command. </p> Starts an action with USER_RETRY status.
 * Runs the recovery runnable, and ensures the state changes to OK and the job completes successfully.
 *
 * @throws Exception/*from   w  ww  . jav  a  2s  . co  m*/
 */
public void testWorkflowActionRecoveryUserRetry() throws Exception {
    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job1 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action1 = this.addRecordToWfActionTable(job1.getId(), "1",
            WorkflowAction.Status.USER_RETRY);

    WorkflowJobBean job2 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action2 = createWorkflowActionSetPending(job2.getId(), WorkflowAction.Status.USER_RETRY);
    //Default recovery created time is 7 days.
    action2.setCreatedTime(new Date(new Date().getTime() - 8 * RecoveryService.ONE_DAY_MILLISCONDS));
    WorkflowActionInsertJPAExecutor actionInsertCmd = new WorkflowActionInsertJPAExecutor(action2);
    jpaService.execute(actionInsertCmd);

    Runnable recoveryRunnable = new RecoveryRunnable(0, 60, 60);
    recoveryRunnable.run();
    sleep(3000);

    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action1.getId());

    waitFor(5000, new Predicate() {
        public boolean evaluate() throws Exception {
            WorkflowActionBean a = jpaService.execute(wfActionGetCmd);
            return a.getExternalId() != null;
        }
    });
    action1 = jpaService.execute(wfActionGetCmd);
    assertNotNull(action1.getExternalId());
    assertEquals(WorkflowAction.Status.RUNNING, action1.getStatus());

    //Action 2 should not get recover as it's created time is older then 7 days
    action2 = WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, action2.getId());
    assertNull(action2.getExternalId());
    assertEquals(WorkflowAction.Status.USER_RETRY, action2.getStatus());

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job1, action1, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf()));
    String user = conf.get("user.name");
    String group = conf.get("group.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action1.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(240 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java

License:Apache License

/**
 * Submit a Pig job to hadoop./*  ww  w. j  ava2 s  . c  om*/
 * 
 * @param mapFuncs
 *            a list of map functions to apply to the inputs. The cardinality of the list should
 *            be the same as input's cardinality.
 * @param groupFuncs
 *            a list of grouping functions to apply to the inputs. The cardinality of the list
 *            should be the same as input's cardinality.
 * @param reduceFunc
 *            the reduce function.
 * @param mapTasks
 *            the number of map tasks to use.
 * @param reduceTasks
 *            the number of reduce tasks to use.
 * @param input
 *            a list of inputs
 * @param output
 *            the path of the output.
 * @return an indicator of success or failure.
 * @throws IOException
 */
public boolean launchPig(POMapreduce pom) throws IOException {
    JobConf conf = new JobConf(config);
    setJobProperties(conf, pom);
    Properties properties = pom.pigContext.getProperties();
    ConfigurationValidator.validatePigProperties(properties);
    String jobName = properties.getProperty(PigContext.JOB_NAME);
    conf.setJobName(jobName);
    boolean success = false;
    List<String> funcs = new ArrayList<String>();

    if (pom.toMap != null) {
        for (EvalSpec es : pom.toMap)
            funcs.addAll(es.getFuncs());
    }
    if (pom.groupFuncs != null) {
        for (EvalSpec es : pom.groupFuncs)
            funcs.addAll(es.getFuncs());
    }
    if (pom.toReduce != null) {
        funcs.addAll(pom.toReduce.getFuncs());
    }

    // create jobs.jar locally and pass it to hadoop
    File submitJarFile = File.createTempFile("Job", ".jar");
    try {
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        JarManager.createJar(fos, funcs, null, pom.pigContext);
        log.debug("Job jar size = " + submitJarFile.length());
        conf.setJar(submitJarFile.getPath());
        String user = System.getProperty("user.name");
        conf.setUser(user != null ? user : "Pigster");

        conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold"));
        conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size"));

        if (pom.reduceParallelism != -1) {
            conf.setNumReduceTasks(pom.reduceParallelism);
        }
        if (pom.toMap != null) {
            conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap));
        }
        if (pom.toCombine != null) {
            conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine));
            // this is to make sure that combiner is only called once
            // since we can't handle no combine or multiple combines
            conf.setCombineOnceOnly(true);
        }
        if (pom.groupFuncs != null) {
            conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs));
        }
        if (pom.toReduce != null) {
            conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce));
        }
        if (pom.toSplit != null) {
            conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit));
        }
        if (pom.pigContext != null) {
            conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext));
        }
        conf.setMapRunnerClass(PigMapReduce.class);
        if (pom.toCombine != null) {
            conf.setCombinerClass(PigCombine.class);
            //conf.setCombinerClass(PigMapReduce.class);
        }
        if (pom.quantilesFile != null) {
            conf.set("pig.quantilesFile", pom.quantilesFile);
        } else {
            // this is not a sort job - can use byte comparison to speed up processing
            conf.setOutputKeyComparatorClass(PigWritableComparator.class);
        }
        if (pom.partitionFunction != null) {
            conf.setPartitionerClass(SortPartitioner.class);
        }
        conf.setReducerClass(PigMapReduce.class);
        conf.setInputFormat(PigInputFormat.class);
        conf.setOutputFormat(PigOutputFormat.class);
        // not used starting with 0.15 conf.setInputKeyClass(Text.class);
        // not used starting with 0.15 conf.setInputValueClass(Tuple.class);
        conf.setOutputKeyClass(Tuple.class);
        if (pom.userComparator != null) {
            conf.setOutputKeyComparatorClass(pom.userComparator);
        }
        conf.setOutputValueClass(IndexedTuple.class);
        conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs));

        conf.setOutputPath(new Path(pom.outputFileSpec.getFileName()));
        conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec()));

        // Setup the DistributedCache for this job
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true);
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false);

        // Setup the logs directory for this job
        String jobOutputFileName = pom.pigContext.getJobOutputFile();
        if (jobOutputFileName != null && jobOutputFileName.length() > 0) {
            Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile());
            conf.set("pig.output.dir", jobOutputFile.getParent().toString());
            conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString());
        }

        //
        // Now, actually submit the job (using the submit name)
        //
        JobClient jobClient = execEngine.getJobClient();
        RunningJob status = jobClient.submitJob(conf);
        log.debug("submitted job: " + status.getJobID());

        long sleepTime = 1000;
        double lastQueryProgress = -1.0;
        int lastJobsQueued = -1;
        double lastMapProgress = -1.0;
        double lastReduceProgress = -1.0;
        while (true) {
            try {
                Thread.sleep(sleepTime);
            } catch (Exception e) {
            }

            if (status.isComplete()) {
                success = status.isSuccessful();
                if (log.isDebugEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Job finished ");
                    sb.append((success ? "" : "un"));
                    sb.append("successfully");
                    log.debug(sb.toString());
                }
                if (success) {
                    mrJobNumber++;
                }
                double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
                break;
            } else // still running
            {
                double mapProgress = status.mapProgress();
                double reduceProgress = status.reduceProgress();
                if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) {
                    if (log.isDebugEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Hadoop job progress: Map=");
                        sbProgress.append((int) (mapProgress * 100));
                        sbProgress.append("% Reduce=");
                        sbProgress.append((int) (reduceProgress * 100));
                        sbProgress.append("%");
                        log.debug(sbProgress.toString());
                    }
                    lastMapProgress = mapProgress;
                    lastReduceProgress = reduceProgress;
                }
                double numJobsCompleted = mrJobNumber;
                double thisJobProgress = (mapProgress + reduceProgress) / 2.0;
                double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
            }
        }

        // bug 1030028: if the input file is empty; hadoop doesn't create the output file!
        Path outputFile = conf.getOutputPath();
        String outputName = outputFile.getName();
        int colon = outputName.indexOf(':');
        if (colon != -1) {
            outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon));
        }

        try {
            ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs()))
                    .asElement(outputFile.toString());

            if (success && !descriptor.exists()) {

                // create an empty output file
                PigFile f = new PigFile(outputFile.toString(), false);
                f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext);
            }
        } catch (DataStorageException e) {
            throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e);
        }

        if (!success) {
            // go find the error messages
            getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map");
            getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce");
        } else {
            long timeSpent = 0;

            // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet.
            TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID());
            TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID());
            for (TaskReport r : mapReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            for (TaskReport r : reduceReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            totalHadoopTimeSpent += timeSpent;
        }
    } catch (Exception e) {
        // Do we need different handling for different exceptions
        e.printStackTrace();
        throw WrappedIOException.wrap(e);
    } finally {
        submitJarFile.delete();
    }
    return success;
}

From source file:org.apache.pig.test.utils.datagen.HadoopRunner.java

License:Apache License

public void generate() throws IOException {
    // Configuration processed by ToolRunner

    // Create a JobConf using the processed conf
    JobConf job;/*from   www  .  j  a  v a  2s .c  o m*/
    if (conf != null) { // TODO: conf could be null, check when and why
        job = new JobConf(conf);
    } else {
        job = new JobConf(new Configuration());
    }
    fs = FileSystem.get(job);

    tmpHome = createTempDir(null);

    String config = genMapFiles().toUri().getRawPath();
    // set config properties into job conf
    job.set(COLUMN_CONF_FILE_PATH, config);
    job.set(COLUMN_OUTPUT_SEPARATOR, String.valueOf((int) dgConf.getSeparator()));

    job.setJobName("data-gen");
    job.setNumMapTasks(dgConf.getNumMappers());
    job.setNumReduceTasks(0);
    job.setMapperClass(DataGenMapper.class);
    job.setJarByClass(DataGenMapper.class);

    // if inFile is specified, use it as input
    if (dgConf.getInFile() != null) {
        FileInputFormat.setInputPaths(job, dgConf.getInFile());
        job.set(HAS_USER_INPUT, "true");
    } else {
        job.set(HAS_USER_INPUT, "false");
        Path input = genInputFiles();
        FileInputFormat.setInputPaths(job, input);
    }
    FileOutputFormat.setOutputPath(job, new Path(dgConf.getOutputFile()));

    // Submit the job, then poll for progress until the job is complete
    System.out.println("Submit hadoop job...");
    RunningJob j = JobClient.runJob(job);
    if (!j.isSuccessful()) {
        throw new IOException("Job failed");
    }

    if (fs.exists(tmpHome)) {
        fs.delete(tmpHome, true);
    }
}