List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine.java
License:Apache License
/** * {@inheritDoc}// w w w.ja v a 2 s . co m */ @Override public boolean submit(JobRequest mrJobRequest) { // We're supporting only map reduce jobs MRJobRequest request = (MRJobRequest) mrJobRequest; // Clone global configuration Configuration configuration = new Configuration(globalConfiguration); // Serialize driver context into job configuration for (Map.Entry<String, String> entry : request.getDriverContext()) { if (entry.getValue() == null) { LOG.warn("Ignoring null driver context value for key " + entry.getKey()); continue; } configuration.set(entry.getKey(), entry.getValue()); } // Serialize connector context as a sub namespace for (Map.Entry<String, String> entry : request.getConnectorContext(Direction.FROM)) { if (entry.getValue() == null) { LOG.warn("Ignoring null connector context value for key " + entry.getKey()); continue; } configuration.set(MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT + entry.getKey(), entry.getValue()); } for (Map.Entry<String, String> entry : request.getConnectorContext(Direction.TO)) { if (entry.getValue() == null) { LOG.warn("Ignoring null connector context value for key " + entry.getKey()); continue; } configuration.set(MRJobConstants.PREFIX_CONNECTOR_TO_CONTEXT + entry.getKey(), entry.getValue()); } // Set up notification URL if it's available if (request.getNotificationUrl() != null) { configuration.set("job.end.notification.url", request.getNotificationUrl()); } // Turn off speculative execution configuration.setBoolean("mapred.map.tasks.speculative.execution", false); configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false); // Promote all required jars to the job configuration.set("tmpjars", StringUtils.join(request.getJars(), ",")); try { Job job = new Job(configuration); // link configs MRConfigurationUtils.setConnectorLinkConfig(Direction.FROM, job, request.getConnectorLinkConfig(Direction.FROM)); MRConfigurationUtils.setConnectorLinkConfig(Direction.TO, job, request.getConnectorLinkConfig(Direction.TO)); // from and to configs MRConfigurationUtils.setConnectorJobConfig(Direction.FROM, job, request.getJobConfig(Direction.FROM)); MRConfigurationUtils.setConnectorJobConfig(Direction.TO, job, request.getJobConfig(Direction.TO)); MRConfigurationUtils.setDriverConfig(job, request.getDriverConfig()); MRConfigurationUtils.setConnectorSchema(Direction.FROM, job, request.getSummary().getFromSchema()); MRConfigurationUtils.setConnectorSchema(Direction.TO, job, request.getSummary().getToSchema()); if (request.getJobName() != null) { job.setJobName("Sqoop: " + request.getJobName()); } else { job.setJobName("Sqoop job with id: " + request.getJobId()); } job.setInputFormatClass(request.getInputFormatClass()); job.setMapperClass(request.getMapperClass()); job.setMapOutputKeyClass(request.getMapOutputKeyClass()); job.setMapOutputValueClass(request.getMapOutputValueClass()); // Set number of reducers as number of configured loaders or suppress // reduce phase entirely if loaders are not set at all. if (request.getLoaders() != null) { job.setNumReduceTasks(request.getLoaders()); } else { job.setNumReduceTasks(0); } job.setOutputFormatClass(request.getOutputFormatClass()); job.setOutputKeyClass(request.getOutputKeyClass()); job.setOutputValueClass(request.getOutputValueClass()); // If we're in local mode than wait on completion. Local job runner do not // seems to be exposing API to get previously submitted job which makes // other methods of the submission engine quite useless. if (isLocal()) { job.waitForCompletion(true); } else { job.submit(); } String jobId = job.getJobID().toString(); request.getSummary().setExternalId(jobId); request.getSummary().setExternalLink(job.getTrackingURL()); LOG.debug("Executed new map-reduce job with id " + jobId); } catch (Exception e) { request.getSummary().setException(e); LOG.error("Error in submitting job", e); return false; } return true; }
From source file:org.apache.tez.mapreduce.examples.OrderedWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w ww . ja va 2 s. c o m*/ } // Configure intermediate reduces conf.setInt(MRJobConfig.MRR_INTERMEDIATE_STAGES, 1); // Set reducer class for intermediate reduce conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduce.class"), IntSumReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.map.output.value.class"), Text.class, Object.class); conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduces"), 2); @SuppressWarnings("deprecation") Job job = new Job(conf, "orderedwordcount"); job.setJarByClass(OrderedWordCount.class); // Configure map job.setMapperClass(TokenizerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Configure reduce job.setReducerClass(MyOrderByNoOpReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); YarnClient yarnClient = new YarnClientImpl(); yarnClient.init(conf); yarnClient.start(); TezClient tezClient = new TezClient(new TezConfiguration(conf)); job.submit(); JobID jobId = job.getJobID(); ApplicationId appId = TypeConverter.toYarn(jobId).getAppId(); DAGClient dagClient = tezClient.getDAGClient(appId); DAGStatus dagStatus = null; while (true) { dagStatus = dagClient.getDAGStatus(); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printMRRDAGStatus(dagStatus); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); System.exit(-1); } } ExampleDriver.printMRRDAGStatus(dagStatus); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); System.exit(dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1); }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@Test(timeout = 60000) public void testMRRSleepJob() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testMRRSleepJob()."); if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return;//from www. j a v a 2 s .com } Configuration sleepConf = new Configuration(mrrTezCluster.getConfig()); MRRSleepJob sleepJob = new MRRSleepJob(); sleepJob.setConf(sleepConf); Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1); job.setJarByClass(MRRSleepJob.class); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); // FIXME once counters and task progress can be obtained properly // TODO use dag client to test counters and task progress? // what about completed jobs? }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@Test(timeout = 60000) public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testRandomWriter()."); if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return;/*w w w .j a v a2s . c o m*/ } RandomTextWriterJob randomWriterJob = new RandomTextWriterJob(); mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072"); mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024"); Job job = randomWriterJob.createJob(mrrTezCluster.getConfig()); Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output"); FileOutputFormat.setOutputPath(job, outputDir); job.setSpeculativeExecution(false); job.setJarByClass(RandomTextWriterJob.class); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); // Make sure there are three files in the output-dir RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrrTezCluster.getConfig()) .listStatus(outputDir); int count = 0; while (iterator.hasNext()) { FileStatus file = iterator.next(); if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { count++; } } Assert.assertEquals("Number of part files is wrong!", 3, count); }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@Test(timeout = 60000) public void testFailingJob() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testFailingJob()."); if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return;//from w ww . j ava 2s . c om } Configuration sleepConf = new Configuration(mrrTezCluster.getConfig()); MRRSleepJob sleepJob = new MRRSleepJob(); sleepJob.setConf(sleepConf); Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1); job.setJarByClass(MRRSleepJob.class); job.setMaxMapAttempts(1); // speed up failures job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true); job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*"); job.submit(); boolean succeeded = job.waitForCompletion(true); Assert.assertFalse(succeeded); Assert.assertEquals(JobStatus.State.FAILED, job.getJobState()); // FIXME once counters and task progress can be obtained properly // TODO verify failed task diagnostics }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@Test(timeout = 60000) public void testFailingAttempt() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testFailingAttempt()."); if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return;/*from w ww . j a v a 2 s.c o m*/ } Configuration sleepConf = new Configuration(mrrTezCluster.getConfig()); MRRSleepJob sleepJob = new MRRSleepJob(); sleepJob.setConf(sleepConf); Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1); job.setJarByClass(MRRSleepJob.class); job.setMaxMapAttempts(3); // speed up failures job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true); job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0"); job.submit(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); // FIXME once counters and task progress can be obtained properly // TODO verify failed task diagnostics }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@Test(timeout = 60000) public void testMRRSleepJobWithCompression() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testMRRSleepJobWithCompression()."); if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return;/*from w w w. ja v a 2 s . c o m*/ } Configuration sleepConf = new Configuration(mrrTezCluster.getConfig()); MRRSleepJob sleepJob = new MRRSleepJob(); sleepJob.setConf(sleepConf); Job job = sleepJob.createJob(1, 1, 2, 1, 1, 1, 1, 1, 1, 1); job.setJarByClass(MRRSleepJob.class); job.setMaxMapAttempts(1); // speed up failures // enable compression job.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); job.getConfiguration().set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, DefaultCodec.class.getName()); job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); // FIXME once counters and task progress can be obtained properly // TODO use dag client to test counters and task progress? // what about completed jobs? }
From source file:org.calrissian.accumulorecipes.entitystore.hadoop.EntityInputFormatTest.java
License:Apache License
@Test public void testQuery() throws IOException, ClassNotFoundException, InterruptedException, AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException { Instance instance = new MockInstance("entityInst"); Connector connector = instance.getConnector("root", "".getBytes()); AccumuloEntityStore store = new AccumuloEntityStore(connector); entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1")) .attr(new Attribute("key2", false)).build(); store.save(singleton(entity));/*from w ww . j av a 2 s .c o m*/ Job job = Job.getInstance(); job.setJarByClass(getClass()); job.setMapperClass(TestMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(EntityInputFormat.class); EntityInputFormat.setMockInstance(job, "entityInst"); EntityInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations()); EntityInputFormat.setQueryInfo(job, Collections.singleton("type"), QueryBuilder.create().eq("key1", "val1").build(), DEFAULT_SHARD_BUILDER, LEXI_TYPES); job.setOutputFormatClass(NullOutputFormat.class); job.submit(); job.waitForCompletion(true); assertEquals(1, TestMapper.entities.size()); assertEquals(TestMapper.entities.get(0).getId(), entity.getId()); assertEquals(TestMapper.entities.get(0).getType(), entity.getType()); assertEquals(new HashSet<Attribute>(TestMapper.entities.get(0).getAttributes()), new HashSet<Attribute>(entity.getAttributes())); }
From source file:org.calrissian.accumulorecipes.entitystore.hadoop.EntityInputFormatTest.java
License:Apache License
@Test public void testGetAllByType() throws IOException, ClassNotFoundException, InterruptedException, AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException { Instance instance = new MockInstance("entityInst1"); Connector connector = instance.getConnector("root", "".getBytes()); AccumuloEntityStore store = new AccumuloEntityStore(connector); entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1")) .attr(new Attribute("key2", false)).build(); store.save(singleton(entity));//from w ww . ja v a2s . com entity2 = EntityBuilder.create("type", "id2").attr(new Attribute("key1", "val1")) .attr(new Attribute("key2", false)).build(); store.save(singleton(entity2)); entity3 = EntityBuilder.create("type1", "id").attr(new Attribute("key1", "val1")) .attr(new Attribute("key2", false)).build(); store.save(singleton(entity3)); Job job = new Job(new Configuration()); job.setJarByClass(getClass()); job.setMapperClass(TestMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(EntityInputFormat.class); EntityInputFormat.setMockInstance(job, "entityInst1"); EntityInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations()); EntityInputFormat.setQueryInfo(job, Collections.singleton("type")); job.setOutputFormatClass(NullOutputFormat.class); job.submit(); job.waitForCompletion(true); assertEquals(2, TestMapper.entities.size()); System.out.println(TestMapper.entities); assertEquals(TestMapper.entities.get(0).getId(), entity.getId()); assertEquals(TestMapper.entities.get(0).getType(), entity.getType()); assertEquals(new HashSet<Attribute>(entity.getAttributes()), new HashSet<Attribute>(TestMapper.entities.get(1).getAttributes())); assertEquals(TestMapper.entities.get(1).getId(), entity2.getId()); assertEquals(TestMapper.entities.get(1).getType(), entity2.getType()); assertEquals(new HashSet<Attribute>(entity2.getAttributes()), new HashSet<Attribute>(TestMapper.entities.get(1).getAttributes())); }
From source file:org.calrissian.accumulorecipes.eventstore.hadoop.EventInputFormatTest.java
License:Apache License
@Test public void test() throws IOException, ClassNotFoundException, InterruptedException, AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException { Instance instance = new MockInstance("eventInst"); Connector connector = instance.getConnector("root", "".getBytes()); AccumuloEventStore store = new AccumuloEventStore(connector); event = EventBuilder.create("", UUID.randomUUID().toString(), System.currentTimeMillis()) .attr(new Attribute("key1", "val1")).attr(new Attribute("key2", false)).build(); store.save(singleton(event));/*from ww w . j av a 2 s . co m*/ Job job = new Job(new Configuration()); job.setJarByClass(getClass()); job.setMapperClass(TestMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(EventInputFormat.class); EventInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations()); EventInputFormat.setMockInstance(job, "eventInst"); EventInputFormat.setQueryInfo(job, new Date(System.currentTimeMillis() - 50000), new Date(), Collections.singleton(""), QueryBuilder.create().eq("key1", "val1").build()); job.setOutputFormatClass(NullOutputFormat.class); job.submit(); job.waitForCompletion(true); assertNotNull(TestMapper.entry); assertEquals(TestMapper.entry.getId(), event.getId()); assertTrue(TestMapper.entry.getTimestamp() - event.getTimestamp() < 50); assertEquals(new HashSet<Attribute>(TestMapper.entry.getAttributes()), new HashSet<Attribute>(event.getAttributes())); }