List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.moz.fiji.mapreduce.gather.FijiGatherJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override// w ww . ja v a2 s. co m protected void configureJob(Job job) throws IOException { // Construct the gatherer instance. if (null == mGathererClass) { throw new JobConfigurationException("Must specify a gatherer."); } final Configuration conf = job.getConfiguration(); // Serialize the gatherer class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_GATHERER_CLASS, mGathererClass, FijiGatherer.class); if ((getJobOutput() instanceof HFileMapReduceJobOutput) && (null == mReducerClass)) { mReducerClass = IdentityReducer.class; } final StringBuilder name = new StringBuilder("Fiji gather: " + mGathererClass.getSimpleName()); if (null != mReducerClass) { name.append(" / " + mReducerClass.getSimpleName()); } job.setJobName(name.toString()); mGatherer = ReflectionUtils.newInstance(mGathererClass, conf); mMapper.setConf(conf); mDataRequest = mGatherer.getDataRequest(); // Construct the combiner instance (if specified). if (null != mCombinerClass) { mCombiner = ReflectionUtils.newInstance(mCombinerClass, conf); } // Construct the reducer instance (if specified). if (null != mReducerClass) { mReducer = ReflectionUtils.newInstance(mReducerClass, conf); } // Configure the table input job (requires mGatherer, mMapper and mReducer to be set): super.configureJob(job); // Some validation: if (getJobOutput() instanceof HFileMapReduceJobOutput) { if (mReducer instanceof IdentityReducer) { Preconditions.checkState(mGatherer.getOutputKeyClass() == HFileKeyValue.class, String.format("Gatherer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'", mGathererClass.getName(), mGatherer.getOutputKeyClass().getName())); Preconditions.checkState(mGatherer.getOutputValueClass() == NullWritable.class, String.format("Gatherer '%s' writing HFiles must output NullWritable values, but got '%s'", mGathererClass.getName(), mGatherer.getOutputValueClass().getName())); } Preconditions.checkState(mReducer.getOutputKeyClass() == HFileKeyValue.class, String.format("Reducer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'", mReducerClass.getName(), mReducer.getOutputKeyClass().getName())); Preconditions.checkState(mReducer.getOutputValueClass() == NullWritable.class, String.format("Reducer '%s' writing HFiles must output NullWritable values, but got '%s'", mReducerClass.getName(), mReducer.getOutputValueClass().getName())); } }
From source file:com.moz.fiji.mapreduce.input.HTableMapReduceJobInput.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww .j a va 2s . c om public void configure(Job job) throws IOException { // Configure the input format class. super.configure(job); // Configure the input HTable name. job.getConfiguration().set(TableInputFormat.INPUT_TABLE, mTableName); }
From source file:com.moz.fiji.mapreduce.input.TestFijiTableMapReduceJobInput.java
License:Apache License
@Test public void testConfigure() throws IOException { final Job job = new Job(); // Request the latest 3 versions of column 'info:email': FijiDataRequestBuilder builder = FijiDataRequest.builder(); builder.newColumnsDef().withMaxVersions(3).add("info", "email"); FijiDataRequest dataRequest = builder.build(); // Read from 'here' to 'there': final EntityId startRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("here")); final EntityId limitRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("there")); final FijiRowFilter filter = new StripValueRowFilter(); final FijiTableMapReduceJobInput.RowOptions rowOptions = FijiTableMapReduceJobInput.RowOptions .create(startRow, limitRow, filter); final MapReduceJobInput fijiTableJobInput = new FijiTableMapReduceJobInput(mTable.getURI(), dataRequest, rowOptions);/* w w w. j av a 2s.c om*/ fijiTableJobInput.configure(job); // Check that the job was configured correctly. final Configuration conf = job.getConfiguration(); assertEquals(mTable.getURI(), FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build()); final FijiDataRequest decoded = (FijiDataRequest) SerializationUtils .deserialize(Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_INPUT_DATA_REQUEST))); assertEquals(dataRequest, decoded); final String confStartRow = Base64.encodeBase64String(startRow.getHBaseRowKey()); final String confLimitRow = Base64.encodeBase64String(limitRow.getHBaseRowKey()); assertEquals(confStartRow, conf.get(FijiConfKeys.FIJI_START_ROW_KEY)); assertEquals(confLimitRow, conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)); assertEquals(filter.toJson().toString(), conf.get(FijiConfKeys.FIJI_ROW_FILTER)); }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter) throws Exception { final Job job = new Job(createConfiguration()); final Configuration conf = job.getConfiguration(); // Get settings for test. final FijiDataRequest request = FijiDataRequest.builder() .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build(); job.setJarByClass(IntegrationTestFijiTableInputFormat.class); // Setup the InputFormat. FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter); job.setInputFormatClass(HBaseFijiTableInputFormat.class); // Duplicate functionality from MapReduceJobBuilder, since we are not using it here: final List<Path> jarFiles = Lists.newArrayList(); final FileSystem fs = FileSystem.getLocal(conf); for (String cpEntry : System.getProperty("java.class.path").split(":")) { if (cpEntry.endsWith(".jar")) { jarFiles.add(fs.makeQualified(new Path(cpEntry))); }// w ww. j a v a2 s.co m } DistributedCacheJars.addJarsToDistributedCache(job, jarFiles); // Create a test job. job.setJobName(jobName); // Setup the OutputFormat. TextOutputFormat.setOutputPath(job, outputFile.getParent()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // Set the mapper class. if (null != mapperClass) { job.setMapperClass(mapperClass); } // Set the reducer class. if (null != reducerClass) { job.setReducerClass(reducerClass); } return job; }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job. */ @Test//from w ww . j av a 2s. co m public void testMapJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe", "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh", "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with start and limit keys. */ @Test/* w w w .ja v a2s .c o m*/ public void testMapJobWithStartAndLimitKeys() throws Exception { final Path outputFile = createOutputFile(); // Set the same entity IDs for start and limit, and we should get just the start row final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com"); final byte[] endRowKey = startEntityId.getHBaseRowKey(); final EntityId rawLimitEntityId = HBaseEntityId .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1)); // Create a test job. final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class startEntityId, rawLimitEntityId, null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with a row filter. */ @Test/* w w w . j ava2 s .c om*/ public void testMapJobWithFilter() throws Exception { final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email", new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com")); final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key filter); // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a MapReduce job. */ @Test/*from w ww .jav a 2s .c o m*/ public void testMapReduceJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder(); for (String line : output) { final String[] keyValue = line.split("\t"); final String emailDomain = keyValue[0]; final Set<String> names = Sets.newHashSet(keyValue[1].split(",")); builder.put(emailDomain, names); } final Map<String, Set<String>> actual = builder.build(); final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder() .put("usermail.example.com", Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu")) .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build(); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.lib.reduce.TestAvroReducer.java
License:Apache License
@Test public void testMapReduce() throws IOException { MyAvroReducer reducer = new MyAvroReducer(); // Configure a job. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // the real hadoop mapreduce framework. AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$); AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema()); AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$); // Run the reducer. ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>(); driver.setReducer(reducer);/* w w w . ja v a2 s . co m*/ driver.withConfiguration(job.getConfiguration()); driver.withInput(new Text("foo"), Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build()))); List<Pair<AvroKey<Node>, NullWritable>> output = driver.run(); assertEquals(1, output.size()); assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString()); }
From source file:com.moz.fiji.mapreduce.lib.reduce.TestMergeNodeReducer.java
License:Apache License
@Test public void testMergeNodeReducer() throws IOException { MergeNodeReducer<Text> reducer = new MergeNodeReducer<Text>(); ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>> driver = new ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>>(); driver.setReducer(reducer);/* ww w . ja v a 2 s . co m*/ // Configure avro serialization. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // in the real hadoop mapreduce framework. AvroJob.setMapOutputValueSchema(job, reducer.getAvroValueWriterSchema()); AvroJob.setOutputValueSchema(job, reducer.getAvroValueWriterSchema()); driver.withConfiguration(job.getConfiguration()); // Here's what our node graph looks like. // // X: A/1.0 ----b/2.0---> C/3.0 // Y: A/1.0 ----b/2.0---> C/6.0 // Z: A/1.0 ----f/1.0---> C/7.0 // W: A/1.0 ----d/4.0---> E/5.0 // driver.withInputKey(new Text("A")); Node node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b") .setWeight(2.0).setTarget(new NodeBuilder().setLabel("C").setWeight(3.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b").setWeight(2.0) .setTarget(new NodeBuilder().setLabel("C").setWeight(6.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("f").setWeight(1.0) .setTarget(new NodeBuilder().setLabel("C").setWeight(7.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("d").setWeight(4.0) .setTarget(new NodeBuilder().setLabel("E").setWeight(5.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); // // A/4.0 ----b/4.0---> C/9.0 // \---d/4.0---> E/5.0 // \---f/1.0---> C/7.0 // List<Pair<Text, AvroValue<Node>>> actual = driver.run(); assertEquals(1, actual.size()); assertEquals("A", actual.get(0).getFirst().toString()); Node actualNode = actual.get(0).getSecond().datum(); assertNotNull(actualNode); assertEquals("A", actualNode.getLabel().toString()); assertEquals(4.0, actualNode.getWeight(), 1e-8); assertEquals(3, actualNode.getEdges().size()); assertEquals("b", actualNode.getEdges().get(0).getLabel().toString()); assertEquals(4.0, actualNode.getEdges().get(0).getWeight(), 1e-8); assertEquals("C", actualNode.getEdges().get(0).getTarget().getLabel().toString()); assertEquals(9.0, actualNode.getEdges().get(0).getTarget().getWeight(), 1e-8); assertEquals("d", actualNode.getEdges().get(1).getLabel().toString()); assertEquals(4.0, actualNode.getEdges().get(1).getWeight(), 1e-8); assertEquals("E", actualNode.getEdges().get(1).getTarget().getLabel().toString()); assertEquals(5.0, actualNode.getEdges().get(1).getTarget().getWeight(), 1e-8); assertEquals("f", actualNode.getEdges().get(2).getLabel().toString()); assertEquals(1.0, actualNode.getEdges().get(2).getWeight(), 1e-8); assertEquals("C", actualNode.getEdges().get(2).getTarget().getLabel().toString()); assertEquals(7.0, actualNode.getEdges().get(2).getTarget().getWeight(), 1e-8); }