List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.moz.fiji.mapreduce.lib.reduce.TestNodeReducer.java
License:Apache License
@Test public void testMapReduce() throws IOException { MyNodeReducer reducer = new MyNodeReducer(); // Configure a job. Job job = new Job(); // We've got to do a little hacking here since mrunit doesn't run exactly like // the real hadoop mapreduce framework. AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$); AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema()); AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$); ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>(); driver.setReducer(reducer);//from w w w. jav a 2 s .c o m driver.withConfiguration(job.getConfiguration()); driver.withInput(new Text("foo"), Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build()))); List<Pair<AvroKey<Node>, NullWritable>> output = driver.run(); assertEquals(1, output.size()); assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString()); }
From source file:com.moz.fiji.mapreduce.output.DirectFijiTableMapReduceJobOutput.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w ww. j a va2 s . c om*/ public void configure(Job job) throws IOException { // sets Hadoop output format, Fiji output table and # of reducers: super.configure(job); final Configuration conf = job.getConfiguration(); // Fiji table context: conf.setClass(FijiConfKeys.FIJI_TABLE_CONTEXT_CLASS, DirectFijiTableWriterContext.class, FijiTableContext.class); // Since there's no "commit" operation for an entire map task writing to a // Fiji table, do not use speculative execution when writing directly to a Fiji table. conf.setBoolean("mapred.map.tasks.speculative.execution", false); }
From source file:com.moz.fiji.mapreduce.output.FijiTableMapReduceJobOutput.java
License:Apache License
/** {@inheritDoc} */ @Override/* www . java2s. c om*/ public void configure(Job job) throws IOException { // sets Hadoop output format according to getOutputFormatClass() super.configure(job); final Configuration conf = job.getConfiguration(); conf.set(FijiConfKeys.FIJI_OUTPUT_TABLE_URI, mTableURI.toString()); job.setNumReduceTasks(getNumReduceTasks()); // Adds HBase dependency jars to the distributed cache so they appear on the task classpath: GenericTableMapReduceUtil.addAllDependencyJars(job); }
From source file:com.moz.fiji.mapreduce.output.framework.HFileReducerMapReduceJobOutput.java
License:Apache License
/** {@inheritDoc} */ @Override/*ww w.j a v a2 s .co m*/ public void configure(Job job) throws IOException { super.configure(job); // sets the Hadoop output format final Configuration conf = job.getConfiguration(); conf.set(FijiConfKeys.FIJI_OUTPUT_TABLE_URI, mJobOutput.getOutputTableURI().toString()); // Fiji table context: conf.setClass(FijiConfKeys.FIJI_TABLE_CONTEXT_CLASS, HFileWriterContext.class, FijiTableContext.class); // Set the output path. FileOutputFormat.setOutputPath(job, mJobOutput.getPath()); job.setNumReduceTasks(mJobOutput.getNumReduceTasks()); }
From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** {@inheritDoc} */ @Override/*from www . ja va 2s. c om*/ public void configure(Job job) throws IOException { // sets Hadoop output format, Fiji output table and # of reducers: super.configure(job); final Configuration conf = job.getConfiguration(); // Fiji table context: conf.setClass(FijiConfKeys.FIJI_TABLE_CONTEXT_CLASS, HFileWriterContext.class, FijiTableContext.class); // Set the output path. FileOutputFormat.setOutputPath(job, mPath); // Configure the total order partitioner so generated HFile shards are contiguous and sorted. configurePartitioner(job, makeTableKeySplit(getOutputTableURI(), getNumReduceTasks(), conf)); // Note: the HFile job output requires the reducer of the MapReduce job to be IdentityReducer. // This is enforced externally. }
From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** * Configures the partitioner for generating HFiles. * * <p>Each generated HFile should fit within a region of of the target table. * Additionally, it's optimal to have only one HFile to load into each region, since a * read from that region will require reading from each HFile under management (until * compaction happens and merges them all back into one HFile).</p> * * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the * records output from the Mapper based on their rank in a total ordering of the * keys. The <code>startKeys</code> argument should contain a list of the first key in * each of those partitions.</p>//from w w w . ja va 2s . co m * * @param job The job to configure. * @param startKeys A list of keys that will mark the boundaries between the partitions * for the sorted map output records. * @throws IOException If there is an error. */ public static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException { FijiMRPlatformBridge.get().setTotalOrderPartitionerClass(job); LOG.info("Configuring " + startKeys.size() + " reduce partitions."); job.setNumReduceTasks(startKeys.size()); // Write the file that the TotalOrderPartitioner reads to determine where to partition records. Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionFilePath); final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration()); partitionFilePath = partitionFilePath.makeQualified(fs); writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys); // Add it to the distributed cache. try { final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(cacheUri, job.getConfiguration()); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.createSymlink(job.getConfiguration()); }
From source file:com.moz.fiji.mapreduce.output.TestFijiTableMapReduceJobOutput.java
License:Apache License
/** Test that mapper speculative execution is disabled for FijiTableMapReduceJobOutput. */ @Test/*from w w w. ja v a2 s. c om*/ public void testSpecExDisabled() throws Exception { final Fiji fiji = getFiji(); final FijiTableLayout layout = FijiTableLayout .createUpdatedLayout(FijiTableLayouts.getLayout(FijiTableLayouts.SIMPLE), null); fiji.createTable("table", layout); FijiURI tableURI = FijiURI.newBuilder(fiji.getURI()).withTableName("table").build(); final Job job = new Job(); new DirectFijiTableMapReduceJobOutput(tableURI).configure(job); final Configuration conf = job.getConfiguration(); boolean isMapSpecExEnabled = conf.getBoolean("mapred.map.tasks.speculative.execution", true); assertFalse(isMapSpecExEnabled); }
From source file:com.moz.fiji.mapreduce.pivot.FijiPivotJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w ww . j a v a 2s .co m*/ protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); if (null == mPivoterClass) { throw new JobConfigurationException("Must specify a FijiPivoter class."); } // Serialize the pivoter class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_PIVOTER_CLASS, mPivoterClass, FijiPivoter.class); // Producers should output to HFiles. mMapper = new PivoterMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("FijiPivoter: " + mPivoterClass.getSimpleName()); mPivoter = ReflectionUtils.newInstance(mPivoterClass, job.getConfiguration()); mDataRequest = mPivoter.getDataRequest(); // Configure the table input job. super.configureJob(job); }
From source file:com.moz.fiji.mapreduce.platform.CDH5FijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w ww . j a v a2 s. c om*/ public void setUserClassesTakesPrecedence(Job job, boolean value) { job.getConfiguration().setBoolean(JobContext.MAPREDUCE_TASK_CLASSPATH_PRECEDENCE, value); }