Example usage for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine.java

License:Apache License

/**
 * {@inheritDoc}// w  w  w.ja v  a 2 s  .  co m
 */
@Override
public boolean submit(JobRequest mrJobRequest) {
    // We're supporting only map reduce jobs
    MRJobRequest request = (MRJobRequest) mrJobRequest;

    // Clone global configuration
    Configuration configuration = new Configuration(globalConfiguration);

    // Serialize driver context into job configuration
    for (Map.Entry<String, String> entry : request.getDriverContext()) {
        if (entry.getValue() == null) {
            LOG.warn("Ignoring null driver context value for key " + entry.getKey());
            continue;
        }
        configuration.set(entry.getKey(), entry.getValue());
    }

    // Serialize connector context as a sub namespace
    for (Map.Entry<String, String> entry : request.getConnectorContext(Direction.FROM)) {
        if (entry.getValue() == null) {
            LOG.warn("Ignoring null connector context value for key " + entry.getKey());
            continue;
        }
        configuration.set(MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT + entry.getKey(), entry.getValue());
    }

    for (Map.Entry<String, String> entry : request.getConnectorContext(Direction.TO)) {
        if (entry.getValue() == null) {
            LOG.warn("Ignoring null connector context value for key " + entry.getKey());
            continue;
        }
        configuration.set(MRJobConstants.PREFIX_CONNECTOR_TO_CONTEXT + entry.getKey(), entry.getValue());
    }

    // Set up notification URL if it's available
    if (request.getNotificationUrl() != null) {
        configuration.set("job.end.notification.url", request.getNotificationUrl());
    }

    // Turn off speculative execution
    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    // Promote all required jars to the job
    configuration.set("tmpjars", StringUtils.join(request.getJars(), ","));

    try {
        Job job = new Job(configuration);

        // link configs
        MRConfigurationUtils.setConnectorLinkConfig(Direction.FROM, job,
                request.getConnectorLinkConfig(Direction.FROM));
        MRConfigurationUtils.setConnectorLinkConfig(Direction.TO, job,
                request.getConnectorLinkConfig(Direction.TO));

        // from and to configs
        MRConfigurationUtils.setConnectorJobConfig(Direction.FROM, job, request.getJobConfig(Direction.FROM));
        MRConfigurationUtils.setConnectorJobConfig(Direction.TO, job, request.getJobConfig(Direction.TO));

        MRConfigurationUtils.setDriverConfig(job, request.getDriverConfig());
        MRConfigurationUtils.setConnectorSchema(Direction.FROM, job, request.getSummary().getFromSchema());
        MRConfigurationUtils.setConnectorSchema(Direction.TO, job, request.getSummary().getToSchema());

        if (request.getJobName() != null) {
            job.setJobName("Sqoop: " + request.getJobName());
        } else {
            job.setJobName("Sqoop job with id: " + request.getJobId());
        }

        job.setInputFormatClass(request.getInputFormatClass());

        job.setMapperClass(request.getMapperClass());
        job.setMapOutputKeyClass(request.getMapOutputKeyClass());
        job.setMapOutputValueClass(request.getMapOutputValueClass());

        // Set number of reducers as number of configured loaders  or suppress
        // reduce phase entirely if loaders are not set at all.
        if (request.getLoaders() != null) {
            job.setNumReduceTasks(request.getLoaders());
        } else {
            job.setNumReduceTasks(0);
        }

        job.setOutputFormatClass(request.getOutputFormatClass());
        job.setOutputKeyClass(request.getOutputKeyClass());
        job.setOutputValueClass(request.getOutputValueClass());

        // If we're in local mode than wait on completion. Local job runner do not
        // seems to be exposing API to get previously submitted job which makes
        // other methods of the submission engine quite useless.
        if (isLocal()) {
            job.waitForCompletion(true);
        } else {
            job.submit();
        }

        String jobId = job.getJobID().toString();
        request.getSummary().setExternalId(jobId);
        request.getSummary().setExternalLink(job.getTrackingURL());

        LOG.debug("Executed new map-reduce job with id " + jobId);
    } catch (Exception e) {
        request.getSummary().setException(e);
        LOG.error("Error in submitting job", e);
        return false;
    }
    return true;
}

From source file:org.apache.tez.mapreduce.examples.OrderedWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w ww .  ja va 2 s.  c o m*/
    }

    // Configure intermediate reduces
    conf.setInt(MRJobConfig.MRR_INTERMEDIATE_STAGES, 1);

    // Set reducer class for intermediate reduce
    conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduce.class"),
            IntSumReducer.class, Reducer.class);
    // Set reducer output key class
    conf.setClass(
            MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.map.output.key.class"),
            IntWritable.class, Object.class);
    // Set reducer output value class
    conf.setClass(
            MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.map.output.value.class"),
            Text.class, Object.class);
    conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduces"), 2);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "orderedwordcount");
    job.setJarByClass(OrderedWordCount.class);

    // Configure map
    job.setMapperClass(TokenizerMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Configure reduce
    job.setReducerClass(MyOrderByNoOpReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    YarnClient yarnClient = new YarnClientImpl();
    yarnClient.init(conf);
    yarnClient.start();

    TezClient tezClient = new TezClient(new TezConfiguration(conf));

    job.submit();
    JobID jobId = job.getJobID();
    ApplicationId appId = TypeConverter.toYarn(jobId).getAppId();

    DAGClient dagClient = tezClient.getDAGClient(appId);
    DAGStatus dagStatus = null;
    while (true) {
        dagStatus = dagClient.getDAGStatus();
        if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                || dagStatus.getState() == DAGStatus.State.FAILED
                || dagStatus.getState() == DAGStatus.State.KILLED
                || dagStatus.getState() == DAGStatus.State.ERROR) {
            break;
        }
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
            // continue;
        }
    }

    while (dagStatus.getState() == DAGStatus.State.RUNNING) {
        try {
            ExampleDriver.printMRRDAGStatus(dagStatus);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                // continue;
            }
            dagStatus = dagClient.getDAGStatus();
        } catch (TezException e) {
            LOG.fatal("Failed to get application progress. Exiting");
            System.exit(-1);
        }
    }

    ExampleDriver.printMRRDAGStatus(dagStatus);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    System.exit(dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1);
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testMRRSleepJob() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testMRRSleepJob().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;//from   www.  j  a  v a 2  s .com
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
            trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

    // FIXME once counters and task progress can be obtained properly
    // TODO use dag client to test counters and task progress?
    // what about completed jobs?
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testRandomWriter().");
    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;/*w w  w  .j  a  v a2s .  c  o  m*/
    }

    RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
    mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
    mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
    Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
    Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setSpeculativeExecution(false);
    job.setJarByClass(RandomTextWriterJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
            trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

    // Make sure there are three files in the output-dir

    RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrrTezCluster.getConfig())
            .listStatus(outputDir);
    int count = 0;
    while (iterator.hasNext()) {
        FileStatus file = iterator.next();
        if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
            count++;
        }
    }
    Assert.assertEquals("Number of part files is wrong!", 3, count);

}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testFailingJob().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;//from w ww . j ava 2s  .  c om
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
    job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");

    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertFalse(succeeded);
    Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());

    // FIXME once counters and task progress can be obtained properly
    // TODO verify failed task diagnostics
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testFailingAttempt().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;/*from  w ww .  j a v a 2  s.c  o m*/
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(3); // speed up failures
    job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
    job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");

    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());

    // FIXME once counters and task progress can be obtained properly
    // TODO verify failed task diagnostics
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testMRRSleepJobWithCompression() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testMRRSleepJobWithCompression().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;/*from w  w  w.  ja  v  a  2 s  .  c o m*/
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 2, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(1); // speed up failures

    // enable compression
    job.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    job.getConfiguration().set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, DefaultCodec.class.getName());

    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
            trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

    // FIXME once counters and task progress can be obtained properly
    // TODO use dag client to test counters and task progress?
    // what about completed jobs?

}

From source file:org.calrissian.accumulorecipes.entitystore.hadoop.EntityInputFormatTest.java

License:Apache License

@Test
public void testQuery() throws IOException, ClassNotFoundException, InterruptedException,
        AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException {

    Instance instance = new MockInstance("entityInst");
    Connector connector = instance.getConnector("root", "".getBytes());
    AccumuloEntityStore store = new AccumuloEntityStore(connector);
    entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1"))
            .attr(new Attribute("key2", false)).build();
    store.save(singleton(entity));/*from  w ww  .  j  av a  2  s .c o  m*/

    Job job = Job.getInstance();
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EntityInputFormat.class);
    EntityInputFormat.setMockInstance(job, "entityInst");
    EntityInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations());
    EntityInputFormat.setQueryInfo(job, Collections.singleton("type"),
            QueryBuilder.create().eq("key1", "val1").build(), DEFAULT_SHARD_BUILDER, LEXI_TYPES);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertEquals(1, TestMapper.entities.size());
    assertEquals(TestMapper.entities.get(0).getId(), entity.getId());
    assertEquals(TestMapper.entities.get(0).getType(), entity.getType());
    assertEquals(new HashSet<Attribute>(TestMapper.entities.get(0).getAttributes()),
            new HashSet<Attribute>(entity.getAttributes()));

}

From source file:org.calrissian.accumulorecipes.entitystore.hadoop.EntityInputFormatTest.java

License:Apache License

@Test
public void testGetAllByType() throws IOException, ClassNotFoundException, InterruptedException,
        AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException {

    Instance instance = new MockInstance("entityInst1");
    Connector connector = instance.getConnector("root", "".getBytes());
    AccumuloEntityStore store = new AccumuloEntityStore(connector);
    entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1"))
            .attr(new Attribute("key2", false)).build();
    store.save(singleton(entity));//from   w ww . ja  v  a2s .  com

    entity2 = EntityBuilder.create("type", "id2").attr(new Attribute("key1", "val1"))
            .attr(new Attribute("key2", false)).build();
    store.save(singleton(entity2));

    entity3 = EntityBuilder.create("type1", "id").attr(new Attribute("key1", "val1"))
            .attr(new Attribute("key2", false)).build();
    store.save(singleton(entity3));

    Job job = new Job(new Configuration());
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EntityInputFormat.class);
    EntityInputFormat.setMockInstance(job, "entityInst1");
    EntityInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations());
    EntityInputFormat.setQueryInfo(job, Collections.singleton("type"));
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertEquals(2, TestMapper.entities.size());
    System.out.println(TestMapper.entities);
    assertEquals(TestMapper.entities.get(0).getId(), entity.getId());
    assertEquals(TestMapper.entities.get(0).getType(), entity.getType());
    assertEquals(new HashSet<Attribute>(entity.getAttributes()),
            new HashSet<Attribute>(TestMapper.entities.get(1).getAttributes()));
    assertEquals(TestMapper.entities.get(1).getId(), entity2.getId());
    assertEquals(TestMapper.entities.get(1).getType(), entity2.getType());
    assertEquals(new HashSet<Attribute>(entity2.getAttributes()),
            new HashSet<Attribute>(TestMapper.entities.get(1).getAttributes()));
}

From source file:org.calrissian.accumulorecipes.eventstore.hadoop.EventInputFormatTest.java

License:Apache License

@Test
public void test() throws IOException, ClassNotFoundException, InterruptedException, AccumuloSecurityException,
        AccumuloException, TableExistsException, TableNotFoundException {

    Instance instance = new MockInstance("eventInst");
    Connector connector = instance.getConnector("root", "".getBytes());
    AccumuloEventStore store = new AccumuloEventStore(connector);
    event = EventBuilder.create("", UUID.randomUUID().toString(), System.currentTimeMillis())
            .attr(new Attribute("key1", "val1")).attr(new Attribute("key2", false)).build();
    store.save(singleton(event));/*from ww w  . j av a 2  s  . co  m*/

    Job job = new Job(new Configuration());
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EventInputFormat.class);
    EventInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations());
    EventInputFormat.setMockInstance(job, "eventInst");
    EventInputFormat.setQueryInfo(job, new Date(System.currentTimeMillis() - 50000), new Date(),
            Collections.singleton(""), QueryBuilder.create().eq("key1", "val1").build());
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertNotNull(TestMapper.entry);
    assertEquals(TestMapper.entry.getId(), event.getId());
    assertTrue(TestMapper.entry.getTimestamp() - event.getTimestamp() < 50);
    assertEquals(new HashSet<Attribute>(TestMapper.entry.getAttributes()),
            new HashSet<Attribute>(event.getAttributes()));

}