List of usage examples for org.apache.hadoop.mapreduce Job Job
@Deprecated public Job() throws IOException
From source file:com.linkedin.cubert.utils.CubertMD.java
License:Open Source License
public static HashMap<String, String> readMetafile(String metaFilePath) throws IOException { Job tempjob = new Job(); Configuration tempconf = tempjob.getConfiguration(); FileSystem fs = FileSystem.get(tempconf); HashMap<String, String> result = new HashMap<String, String>(); FSDataInputStream inStream;//from ww w . j a v a 2 s. c om try { inStream = fs.open(new Path(metaFilePath + "/.meta")); BufferedReader breader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = breader.readLine()) != null) { String[] splits = line.split("\\s+"); result.put(splits[0], splits[1]); } } catch (IOException e) { return result; } return result; }
From source file:com.moz.fiji.mapreduce.input.TestFijiTableMapReduceJobInput.java
License:Apache License
@Test public void testConfigure() throws IOException { final Job job = new Job(); // Request the latest 3 versions of column 'info:email': FijiDataRequestBuilder builder = FijiDataRequest.builder(); builder.newColumnsDef().withMaxVersions(3).add("info", "email"); FijiDataRequest dataRequest = builder.build(); // Read from 'here' to 'there': final EntityId startRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("here")); final EntityId limitRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("there")); final FijiRowFilter filter = new StripValueRowFilter(); final FijiTableMapReduceJobInput.RowOptions rowOptions = FijiTableMapReduceJobInput.RowOptions .create(startRow, limitRow, filter); final MapReduceJobInput fijiTableJobInput = new FijiTableMapReduceJobInput(mTable.getURI(), dataRequest, rowOptions);//from ww w.j a v a 2s .com fijiTableJobInput.configure(job); // Check that the job was configured correctly. final Configuration conf = job.getConfiguration(); assertEquals(mTable.getURI(), FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build()); final FijiDataRequest decoded = (FijiDataRequest) SerializationUtils .deserialize(Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_INPUT_DATA_REQUEST))); assertEquals(dataRequest, decoded); final String confStartRow = Base64.encodeBase64String(startRow.getHBaseRowKey()); final String confLimitRow = Base64.encodeBase64String(limitRow.getHBaseRowKey()); assertEquals(confStartRow, conf.get(FijiConfKeys.FIJI_START_ROW_KEY)); assertEquals(confLimitRow, conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)); assertEquals(filter.toJson().toString(), conf.get(FijiConfKeys.FIJI_ROW_FILTER)); }
From source file:com.moz.fiji.mapreduce.lib.reduce.TestAvroReducer.java
License:Apache License
@Test public void testMapReduce() throws IOException { MyAvroReducer reducer = new MyAvroReducer(); // Configure a job. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // the real hadoop mapreduce framework. AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$); AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema()); AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$); // Run the reducer. ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>(); driver.setReducer(reducer);//from w w w . ja v a2s . c o m driver.withConfiguration(job.getConfiguration()); driver.withInput(new Text("foo"), Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build()))); List<Pair<AvroKey<Node>, NullWritable>> output = driver.run(); assertEquals(1, output.size()); assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString()); }
From source file:com.moz.fiji.mapreduce.lib.reduce.TestMergeNodeReducer.java
License:Apache License
@Test public void testMergeNodeReducer() throws IOException { MergeNodeReducer<Text> reducer = new MergeNodeReducer<Text>(); ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>> driver = new ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>>(); driver.setReducer(reducer);//from w w w . j ava 2 s. co m // Configure avro serialization. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // in the real hadoop mapreduce framework. AvroJob.setMapOutputValueSchema(job, reducer.getAvroValueWriterSchema()); AvroJob.setOutputValueSchema(job, reducer.getAvroValueWriterSchema()); driver.withConfiguration(job.getConfiguration()); // Here's what our node graph looks like. // // X: A/1.0 ----b/2.0---> C/3.0 // Y: A/1.0 ----b/2.0---> C/6.0 // Z: A/1.0 ----f/1.0---> C/7.0 // W: A/1.0 ----d/4.0---> E/5.0 // driver.withInputKey(new Text("A")); Node node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b") .setWeight(2.0).setTarget(new NodeBuilder().setLabel("C").setWeight(3.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b").setWeight(2.0) .setTarget(new NodeBuilder().setLabel("C").setWeight(6.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("f").setWeight(1.0) .setTarget(new NodeBuilder().setLabel("C").setWeight(7.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("d").setWeight(4.0) .setTarget(new NodeBuilder().setLabel("E").setWeight(5.0).build()).build()).build(); driver.withInputValue(new AvroValue<Node>(node)); // // A/4.0 ----b/4.0---> C/9.0 // \---d/4.0---> E/5.0 // \---f/1.0---> C/7.0 // List<Pair<Text, AvroValue<Node>>> actual = driver.run(); assertEquals(1, actual.size()); assertEquals("A", actual.get(0).getFirst().toString()); Node actualNode = actual.get(0).getSecond().datum(); assertNotNull(actualNode); assertEquals("A", actualNode.getLabel().toString()); assertEquals(4.0, actualNode.getWeight(), 1e-8); assertEquals(3, actualNode.getEdges().size()); assertEquals("b", actualNode.getEdges().get(0).getLabel().toString()); assertEquals(4.0, actualNode.getEdges().get(0).getWeight(), 1e-8); assertEquals("C", actualNode.getEdges().get(0).getTarget().getLabel().toString()); assertEquals(9.0, actualNode.getEdges().get(0).getTarget().getWeight(), 1e-8); assertEquals("d", actualNode.getEdges().get(1).getLabel().toString()); assertEquals(4.0, actualNode.getEdges().get(1).getWeight(), 1e-8); assertEquals("E", actualNode.getEdges().get(1).getTarget().getLabel().toString()); assertEquals(5.0, actualNode.getEdges().get(1).getTarget().getWeight(), 1e-8); assertEquals("f", actualNode.getEdges().get(2).getLabel().toString()); assertEquals(1.0, actualNode.getEdges().get(2).getWeight(), 1e-8); assertEquals("C", actualNode.getEdges().get(2).getTarget().getLabel().toString()); assertEquals(7.0, actualNode.getEdges().get(2).getTarget().getWeight(), 1e-8); }
From source file:com.moz.fiji.mapreduce.lib.reduce.TestNodeReducer.java
License:Apache License
@Test public void testMapReduce() throws IOException { MyNodeReducer reducer = new MyNodeReducer(); // Configure a job. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // the real hadoop mapreduce framework. AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$); AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema()); AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$); ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>(); driver.setReducer(reducer);/*w w w .ja va 2 s .com*/ driver.withConfiguration(job.getConfiguration()); driver.withInput(new Text("foo"), Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build()))); List<Pair<AvroKey<Node>, NullWritable>> output = driver.run(); assertEquals(1, output.size()); assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString()); }
From source file:com.moz.fiji.mapreduce.output.TestFijiTableMapReduceJobOutput.java
License:Apache License
/** Test that mapper speculative execution is disabled for FijiTableMapReduceJobOutput. */ @Test/*w w w . jav a2 s.c o m*/ public void testSpecExDisabled() throws Exception { final Fiji fiji = getFiji(); final FijiTableLayout layout = FijiTableLayout .createUpdatedLayout(FijiTableLayouts.getLayout(FijiTableLayouts.SIMPLE), null); fiji.createTable("table", layout); FijiURI tableURI = FijiURI.newBuilder(fiji.getURI()).withTableName("table").build(); final Job job = new Job(); new DirectFijiTableMapReduceJobOutput(tableURI).configure(job); final Configuration conf = job.getConfiguration(); boolean isMapSpecExEnabled = conf.getBoolean("mapred.map.tasks.speculative.execution", true); assertFalse(isMapSpecExEnabled); }
From source file:com.moz.fiji.mapreduce.output.TestFileMapReduceJobOutput.java
License:Apache License
@Test public void testConfigure() throws ClassNotFoundException, IOException { final Path filePath = new Path("foo/bar"); final int numSplits = 42; final Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class; FileMapReduceJobOutput jobOutput = new ConcreteFileMapReduceJobOutput(filePath, numSplits, outputFormatClass);//from www . j a v a 2s . c om Job job = new Job(); jobOutput.configure(job); // The output format class should be set in the job configuration. assertEquals(outputFormatClass, job.getOutputFormatClass()); // The file output path should be set in the job configuration. assert (FileOutputFormat.getOutputPath(job).toString().endsWith(filePath.toString())); // The number of reduce tasks should be set to the number of splits. assertEquals(numSplits, job.getNumReduceTasks()); }
From source file:com.moz.fiji.mapreduce.TestDistributedCacheJars.java
License:Apache License
/** * Pre: Requires mTempDir to be set and filled (only) with .jar files. * These don't need to actually be jars. * * Creates a new Job and checks that jars de-dupe. * * @throws IOException if configuration can not be created. *//*from w w w . ja v a 2 s. co m*/ @Test public void testJarsDeDupe() throws IOException { final File tempDir = getLocalTempDir(); // Jar list should de-dupe to {"myjar_a, "myjar_b", "myjar_0", "myjar_1"} Set<String> dedupedJarNames = new HashSet<String>(4); dedupedJarNames.add("myjar_a.jar"); dedupedJarNames.add("myjar_b.jar"); dedupedJarNames.add("myjar_0.jar"); dedupedJarNames.add("myjar_1.jar"); Job job = new Job(); List<String> someJars = new ArrayList<String>(); // Some unique jar names. someJars.add("/somepath/myjar_a.jar"); someJars.add("/another/path/myjar_b.jar"); someJars.add("/myjar_0.jar"); // Duplicate jars. someJars.add("/another/path/myjar_b.jar"); someJars.add("/yet/another/path/myjar_b.jar"); job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(someJars, ",")); // Now add some duplicate jars from mTempDir. assertEquals(0, tempDir.list().length); createTestJars(tempDir, "myjar_0.jar", "myjar_1.jar"); assertEquals(2, tempDir.list().length); DistributedCacheJars.addJarsToDistributedCache(job, tempDir); // Confirm each jar appears in de-dupe list exactly once. String listedJars = job.getConfiguration().get(CONF_TMPJARS); String[] jars = listedJars.split(","); for (String jar : jars) { // Check that path terminates in an expected jar. Path p = new Path(jar); assertTrue(dedupedJarNames.contains(p.getName())); dedupedJarNames.remove(p.getName()); } assertEquals(0, dedupedJarNames.size()); }
From source file:com.moz.fiji.schema.mapreduce.TestDistributedCacheJars.java
License:Apache License
/** * Pre: Requires mTempDir to be set and filled (only) with .jar files. * These don't need to actually be jars. * * Creates a new Job and checks that jars de-dupe. * * @throws IOException if configuration can not be created. *///from w w w . j ava 2 s. c o m @Test public void testJarsDeDupe() throws IOException { // Jar list should de-dupe to {"myjar_a, "myjar_b", "myjar_0", "myjar_1"} Set<String> dedupedJarNames = new HashSet<String>(4); dedupedJarNames.add("myjar_a.jar"); dedupedJarNames.add("myjar_b.jar"); dedupedJarNames.add("myjar_0.jar"); dedupedJarNames.add("myjar_1.jar"); Job job = new Job(); List<String> someJars = new ArrayList<String>(); // Some unique jar names. someJars.add("/somepath/myjar_a.jar"); someJars.add("/another/path/myjar_b.jar"); someJars.add("/myjar_0.jar"); // Duplicate jars. someJars.add("/another/path/myjar_b.jar"); someJars.add("/yet/another/path/myjar_b.jar"); job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(someJars, ",")); // Now add some duplicate jars from mTempDir. assertEquals(0, mTempDir.getRoot().list().length); createTestJars("myjar_0.jar", "myjar_1.jar"); assertEquals(2, mTempDir.getRoot().list().length); DistributedCacheJars.addJarsToDistributedCache(job, mTempDir.getRoot()); // Confirm each jar appears in de-dupe list exactly once. String listedJars = job.getConfiguration().get(CONF_TMPJARS); String[] jars = listedJars.split(","); for (String jar : jars) { // Check that path terminates in an expected jar. Path p = new Path(jar); assertTrue(dedupedJarNames.contains(p.getName())); dedupedJarNames.remove(p.getName()); } assertEquals(0, dedupedJarNames.size()); }
From source file:com.nikoo28.excel.mapreduce.ExcelDriver.java
License:Apache License
/** * Main entry point for the example./*from ww w. ja v a 2 s. c o m*/ * * @param args arguments * @throws Exception when something goes wrong */ public static void main(String[] args) throws Exception { logger.info("Driver started"); Job job = new Job(); job.setJarByClass(ExcelDriver.class); job.setJobName("Excel Record Reader"); job.setMapperClass(ExcelMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(ExcelInputFormat.class); job.waitForCompletion(true); }