Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.moz.fiji.mapreduce.gather.FijiGatherJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override// w  ww  .  ja  v a2  s.  co  m
protected void configureJob(Job job) throws IOException {
    // Construct the gatherer instance.
    if (null == mGathererClass) {
        throw new JobConfigurationException("Must specify a gatherer.");
    }

    final Configuration conf = job.getConfiguration();

    // Serialize the gatherer class name into the job configuration.
    conf.setClass(FijiConfKeys.FIJI_GATHERER_CLASS, mGathererClass, FijiGatherer.class);

    if ((getJobOutput() instanceof HFileMapReduceJobOutput) && (null == mReducerClass)) {
        mReducerClass = IdentityReducer.class;
    }

    final StringBuilder name = new StringBuilder("Fiji gather: " + mGathererClass.getSimpleName());
    if (null != mReducerClass) {
        name.append(" / " + mReducerClass.getSimpleName());
    }
    job.setJobName(name.toString());

    mGatherer = ReflectionUtils.newInstance(mGathererClass, conf);
    mMapper.setConf(conf);
    mDataRequest = mGatherer.getDataRequest();

    // Construct the combiner instance (if specified).
    if (null != mCombinerClass) {
        mCombiner = ReflectionUtils.newInstance(mCombinerClass, conf);
    }

    // Construct the reducer instance (if specified).
    if (null != mReducerClass) {
        mReducer = ReflectionUtils.newInstance(mReducerClass, conf);
    }

    // Configure the table input job (requires mGatherer, mMapper and mReducer to be set):
    super.configureJob(job);

    // Some validation:
    if (getJobOutput() instanceof HFileMapReduceJobOutput) {
        if (mReducer instanceof IdentityReducer) {
            Preconditions.checkState(mGatherer.getOutputKeyClass() == HFileKeyValue.class,
                    String.format("Gatherer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'",
                            mGathererClass.getName(), mGatherer.getOutputKeyClass().getName()));
            Preconditions.checkState(mGatherer.getOutputValueClass() == NullWritable.class,
                    String.format("Gatherer '%s' writing HFiles must output NullWritable values, but got '%s'",
                            mGathererClass.getName(), mGatherer.getOutputValueClass().getName()));
        }
        Preconditions.checkState(mReducer.getOutputKeyClass() == HFileKeyValue.class,
                String.format("Reducer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'",
                        mReducerClass.getName(), mReducer.getOutputKeyClass().getName()));
        Preconditions.checkState(mReducer.getOutputValueClass() == NullWritable.class,
                String.format("Reducer '%s' writing HFiles must output NullWritable values, but got '%s'",
                        mReducerClass.getName(), mReducer.getOutputValueClass().getName()));
    }
}

From source file:com.moz.fiji.mapreduce.input.HTableMapReduceJobInput.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w ww .j a va  2s  .  c  om
public void configure(Job job) throws IOException {
    // Configure the input format class.
    super.configure(job);

    // Configure the input HTable name.
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, mTableName);
}

From source file:com.moz.fiji.mapreduce.input.TestFijiTableMapReduceJobInput.java

License:Apache License

@Test
public void testConfigure() throws IOException {
    final Job job = new Job();

    // Request the latest 3 versions of column 'info:email':
    FijiDataRequestBuilder builder = FijiDataRequest.builder();
    builder.newColumnsDef().withMaxVersions(3).add("info", "email");
    FijiDataRequest dataRequest = builder.build();

    // Read from 'here' to 'there':
    final EntityId startRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("here"));
    final EntityId limitRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("there"));
    final FijiRowFilter filter = new StripValueRowFilter();
    final FijiTableMapReduceJobInput.RowOptions rowOptions = FijiTableMapReduceJobInput.RowOptions
            .create(startRow, limitRow, filter);
    final MapReduceJobInput fijiTableJobInput = new FijiTableMapReduceJobInput(mTable.getURI(), dataRequest,
            rowOptions);/*  w  w  w.  j  av  a 2s.c  om*/
    fijiTableJobInput.configure(job);

    // Check that the job was configured correctly.
    final Configuration conf = job.getConfiguration();
    assertEquals(mTable.getURI(), FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build());

    final FijiDataRequest decoded = (FijiDataRequest) SerializationUtils
            .deserialize(Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_INPUT_DATA_REQUEST)));
    assertEquals(dataRequest, decoded);

    final String confStartRow = Base64.encodeBase64String(startRow.getHBaseRowKey());
    final String confLimitRow = Base64.encodeBase64String(limitRow.getHBaseRowKey());
    assertEquals(confStartRow, conf.get(FijiConfKeys.FIJI_START_ROW_KEY));
    assertEquals(confLimitRow, conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY));

    assertEquals(filter.toJson().toString(), conf.get(FijiConfKeys.FIJI_ROW_FILTER));
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }//  w  ww.  j  a  v  a2  s.co  m
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job. */
@Test//from  w  ww  . j av  a  2s. co  m
public void testMapJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe",
            "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh",
            "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with start and limit keys. */
@Test/* w w  w .ja  v a2s .c  o  m*/
public void testMapJobWithStartAndLimitKeys() throws Exception {
    final Path outputFile = createOutputFile();
    // Set the same entity IDs for start and limit, and we should get just the start row
    final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com");
    final byte[] endRowKey = startEntityId.getHBaseRowKey();
    final EntityId rawLimitEntityId = HBaseEntityId
            .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1));

    // Create a test job.
    final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class
            startEntityId, rawLimitEntityId, null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with a row filter. */
@Test/* w  w  w  .  j  ava2  s .c om*/
public void testMapJobWithFilter() throws Exception {
    final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email",
            new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com"));
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            filter);

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a MapReduce job. */
@Test/*from  w  ww .jav  a 2s  .c  o m*/
public void testMapReduceJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder();
    for (String line : output) {
        final String[] keyValue = line.split("\t");
        final String emailDomain = keyValue[0];
        final Set<String> names = Sets.newHashSet(keyValue[1].split(","));

        builder.put(emailDomain, names);
    }
    final Map<String, Set<String>> actual = builder.build();
    final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder()
            .put("usermail.example.com",
                    Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu"))
            .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build();
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.lib.reduce.TestAvroReducer.java

License:Apache License

@Test
public void testMapReduce() throws IOException {
    MyAvroReducer reducer = new MyAvroReducer();

    // Configure a job.
    Job job = new Job();
    // We've got to do a little hacking here since mrunit doesn't run exactly like
    // the real hadoop mapreduce framework.
    AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
    AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
    AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

    // Run the reducer.
    ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
    driver.setReducer(reducer);/*  w w  w  . ja v a2  s  .  co m*/
    driver.withConfiguration(job.getConfiguration());
    driver.withInput(new Text("foo"),
            Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
    List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
    assertEquals(1, output.size());
    assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}

From source file:com.moz.fiji.mapreduce.lib.reduce.TestMergeNodeReducer.java

License:Apache License

@Test
public void testMergeNodeReducer() throws IOException {
    MergeNodeReducer<Text> reducer = new MergeNodeReducer<Text>();
    ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>> driver = new ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>>();
    driver.setReducer(reducer);/* ww w .  ja  v a  2  s  . co m*/

    // Configure avro serialization.
    Job job = new Job();
    // We've got to do a little hacking here since mrunit doesn't run exactly like
    // in the real hadoop mapreduce framework.
    AvroJob.setMapOutputValueSchema(job, reducer.getAvroValueWriterSchema());
    AvroJob.setOutputValueSchema(job, reducer.getAvroValueWriterSchema());
    driver.withConfiguration(job.getConfiguration());

    // Here's what our node graph looks like.
    //
    //  X: A/1.0 ----b/2.0---> C/3.0
    //  Y: A/1.0 ----b/2.0---> C/6.0
    //  Z: A/1.0 ----f/1.0---> C/7.0
    //  W: A/1.0 ----d/4.0---> E/5.0
    //
    driver.withInputKey(new Text("A"));

    Node node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b")
            .setWeight(2.0).setTarget(new NodeBuilder().setLabel("C").setWeight(3.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b").setWeight(2.0)
            .setTarget(new NodeBuilder().setLabel("C").setWeight(6.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("f").setWeight(1.0)
            .setTarget(new NodeBuilder().setLabel("C").setWeight(7.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("d").setWeight(4.0)
            .setTarget(new NodeBuilder().setLabel("E").setWeight(5.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    //
    // A/4.0 ----b/4.0---> C/9.0
    //        \---d/4.0---> E/5.0
    //        \---f/1.0---> C/7.0
    //
    List<Pair<Text, AvroValue<Node>>> actual = driver.run();
    assertEquals(1, actual.size());
    assertEquals("A", actual.get(0).getFirst().toString());
    Node actualNode = actual.get(0).getSecond().datum();
    assertNotNull(actualNode);
    assertEquals("A", actualNode.getLabel().toString());
    assertEquals(4.0, actualNode.getWeight(), 1e-8);
    assertEquals(3, actualNode.getEdges().size());
    assertEquals("b", actualNode.getEdges().get(0).getLabel().toString());
    assertEquals(4.0, actualNode.getEdges().get(0).getWeight(), 1e-8);
    assertEquals("C", actualNode.getEdges().get(0).getTarget().getLabel().toString());
    assertEquals(9.0, actualNode.getEdges().get(0).getTarget().getWeight(), 1e-8);
    assertEquals("d", actualNode.getEdges().get(1).getLabel().toString());
    assertEquals(4.0, actualNode.getEdges().get(1).getWeight(), 1e-8);
    assertEquals("E", actualNode.getEdges().get(1).getTarget().getLabel().toString());
    assertEquals(5.0, actualNode.getEdges().get(1).getTarget().getWeight(), 1e-8);
    assertEquals("f", actualNode.getEdges().get(2).getLabel().toString());
    assertEquals(1.0, actualNode.getEdges().get(2).getWeight(), 1e-8);
    assertEquals("C", actualNode.getEdges().get(2).getTarget().getLabel().toString());
    assertEquals(7.0, actualNode.getEdges().get(2).getTarget().getWeight(), 1e-8);
}