Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

@Deprecated
public Job() throws IOException 

Source Link

Usage

From source file:com.linkedin.cubert.utils.CubertMD.java

License:Open Source License

public static HashMap<String, String> readMetafile(String metaFilePath) throws IOException {
    Job tempjob = new Job();
    Configuration tempconf = tempjob.getConfiguration();
    FileSystem fs = FileSystem.get(tempconf);

    HashMap<String, String> result = new HashMap<String, String>();
    FSDataInputStream inStream;//from   ww  w  .  j  a  v a 2  s.  c om
    try {
        inStream = fs.open(new Path(metaFilePath + "/.meta"));

        BufferedReader breader = new BufferedReader(new InputStreamReader(inStream));
        String line;
        while ((line = breader.readLine()) != null) {
            String[] splits = line.split("\\s+");
            result.put(splits[0], splits[1]);
        }
    } catch (IOException e) {
        return result;
    }
    return result;
}

From source file:com.moz.fiji.mapreduce.input.TestFijiTableMapReduceJobInput.java

License:Apache License

@Test
public void testConfigure() throws IOException {
    final Job job = new Job();

    // Request the latest 3 versions of column 'info:email':
    FijiDataRequestBuilder builder = FijiDataRequest.builder();
    builder.newColumnsDef().withMaxVersions(3).add("info", "email");
    FijiDataRequest dataRequest = builder.build();

    // Read from 'here' to 'there':
    final EntityId startRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("here"));
    final EntityId limitRow = HBaseEntityId.fromHBaseRowKey(Bytes.toBytes("there"));
    final FijiRowFilter filter = new StripValueRowFilter();
    final FijiTableMapReduceJobInput.RowOptions rowOptions = FijiTableMapReduceJobInput.RowOptions
            .create(startRow, limitRow, filter);
    final MapReduceJobInput fijiTableJobInput = new FijiTableMapReduceJobInput(mTable.getURI(), dataRequest,
            rowOptions);//from  ww  w.j  a  v  a 2s  .com
    fijiTableJobInput.configure(job);

    // Check that the job was configured correctly.
    final Configuration conf = job.getConfiguration();
    assertEquals(mTable.getURI(), FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build());

    final FijiDataRequest decoded = (FijiDataRequest) SerializationUtils
            .deserialize(Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_INPUT_DATA_REQUEST)));
    assertEquals(dataRequest, decoded);

    final String confStartRow = Base64.encodeBase64String(startRow.getHBaseRowKey());
    final String confLimitRow = Base64.encodeBase64String(limitRow.getHBaseRowKey());
    assertEquals(confStartRow, conf.get(FijiConfKeys.FIJI_START_ROW_KEY));
    assertEquals(confLimitRow, conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY));

    assertEquals(filter.toJson().toString(), conf.get(FijiConfKeys.FIJI_ROW_FILTER));
}

From source file:com.moz.fiji.mapreduce.lib.reduce.TestAvroReducer.java

License:Apache License

@Test
public void testMapReduce() throws IOException {
    MyAvroReducer reducer = new MyAvroReducer();

    // Configure a job.
    Job job = new Job();
    // We've got to do a little hacking here since mrunit doesn't run exactly like
    // the real hadoop mapreduce framework.
    AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
    AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
    AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

    // Run the reducer.
    ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
    driver.setReducer(reducer);//from w  w w  .  ja  v a2s  . c  o m
    driver.withConfiguration(job.getConfiguration());
    driver.withInput(new Text("foo"),
            Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
    List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
    assertEquals(1, output.size());
    assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}

From source file:com.moz.fiji.mapreduce.lib.reduce.TestMergeNodeReducer.java

License:Apache License

@Test
public void testMergeNodeReducer() throws IOException {
    MergeNodeReducer<Text> reducer = new MergeNodeReducer<Text>();
    ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>> driver = new ReduceDriver<Text, AvroValue<Node>, Text, AvroValue<Node>>();
    driver.setReducer(reducer);//from w  w w .  j ava 2 s. co m

    // Configure avro serialization.
    Job job = new Job();
    // We've got to do a little hacking here since mrunit doesn't run exactly like
    // in the real hadoop mapreduce framework.
    AvroJob.setMapOutputValueSchema(job, reducer.getAvroValueWriterSchema());
    AvroJob.setOutputValueSchema(job, reducer.getAvroValueWriterSchema());
    driver.withConfiguration(job.getConfiguration());

    // Here's what our node graph looks like.
    //
    //  X: A/1.0 ----b/2.0---> C/3.0
    //  Y: A/1.0 ----b/2.0---> C/6.0
    //  Z: A/1.0 ----f/1.0---> C/7.0
    //  W: A/1.0 ----d/4.0---> E/5.0
    //
    driver.withInputKey(new Text("A"));

    Node node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b")
            .setWeight(2.0).setTarget(new NodeBuilder().setLabel("C").setWeight(3.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("b").setWeight(2.0)
            .setTarget(new NodeBuilder().setLabel("C").setWeight(6.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("f").setWeight(1.0)
            .setTarget(new NodeBuilder().setLabel("C").setWeight(7.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    node = new NodeBuilder().setLabel("A").setWeight(1.0).addEdge(new EdgeBuilder().setLabel("d").setWeight(4.0)
            .setTarget(new NodeBuilder().setLabel("E").setWeight(5.0).build()).build()).build();
    driver.withInputValue(new AvroValue<Node>(node));

    //
    // A/4.0 ----b/4.0---> C/9.0
    //        \---d/4.0---> E/5.0
    //        \---f/1.0---> C/7.0
    //
    List<Pair<Text, AvroValue<Node>>> actual = driver.run();
    assertEquals(1, actual.size());
    assertEquals("A", actual.get(0).getFirst().toString());
    Node actualNode = actual.get(0).getSecond().datum();
    assertNotNull(actualNode);
    assertEquals("A", actualNode.getLabel().toString());
    assertEquals(4.0, actualNode.getWeight(), 1e-8);
    assertEquals(3, actualNode.getEdges().size());
    assertEquals("b", actualNode.getEdges().get(0).getLabel().toString());
    assertEquals(4.0, actualNode.getEdges().get(0).getWeight(), 1e-8);
    assertEquals("C", actualNode.getEdges().get(0).getTarget().getLabel().toString());
    assertEquals(9.0, actualNode.getEdges().get(0).getTarget().getWeight(), 1e-8);
    assertEquals("d", actualNode.getEdges().get(1).getLabel().toString());
    assertEquals(4.0, actualNode.getEdges().get(1).getWeight(), 1e-8);
    assertEquals("E", actualNode.getEdges().get(1).getTarget().getLabel().toString());
    assertEquals(5.0, actualNode.getEdges().get(1).getTarget().getWeight(), 1e-8);
    assertEquals("f", actualNode.getEdges().get(2).getLabel().toString());
    assertEquals(1.0, actualNode.getEdges().get(2).getWeight(), 1e-8);
    assertEquals("C", actualNode.getEdges().get(2).getTarget().getLabel().toString());
    assertEquals(7.0, actualNode.getEdges().get(2).getTarget().getWeight(), 1e-8);
}

From source file:com.moz.fiji.mapreduce.lib.reduce.TestNodeReducer.java

License:Apache License

@Test
public void testMapReduce() throws IOException {
    MyNodeReducer reducer = new MyNodeReducer();

    // Configure a job.
    Job job = new Job();
    // We've got to do a little hacking here since mrunit doesn't run exactly like
    // the real hadoop mapreduce framework.
    AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
    AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
    AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

    ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
    driver.setReducer(reducer);/*w  w w  .ja  va 2 s  .com*/
    driver.withConfiguration(job.getConfiguration());
    driver.withInput(new Text("foo"),
            Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
    List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
    assertEquals(1, output.size());
    assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}

From source file:com.moz.fiji.mapreduce.output.TestFijiTableMapReduceJobOutput.java

License:Apache License

/** Test that mapper speculative execution is disabled for FijiTableMapReduceJobOutput. */
@Test/*w w  w .  jav a2 s.c  o  m*/
public void testSpecExDisabled() throws Exception {
    final Fiji fiji = getFiji();
    final FijiTableLayout layout = FijiTableLayout
            .createUpdatedLayout(FijiTableLayouts.getLayout(FijiTableLayouts.SIMPLE), null);
    fiji.createTable("table", layout);
    FijiURI tableURI = FijiURI.newBuilder(fiji.getURI()).withTableName("table").build();

    final Job job = new Job();
    new DirectFijiTableMapReduceJobOutput(tableURI).configure(job);

    final Configuration conf = job.getConfiguration();
    boolean isMapSpecExEnabled = conf.getBoolean("mapred.map.tasks.speculative.execution", true);
    assertFalse(isMapSpecExEnabled);
}

From source file:com.moz.fiji.mapreduce.output.TestFileMapReduceJobOutput.java

License:Apache License

@Test
public void testConfigure() throws ClassNotFoundException, IOException {
    final Path filePath = new Path("foo/bar");
    final int numSplits = 42;
    final Class<? extends OutputFormat> outputFormatClass = TextOutputFormat.class;
    FileMapReduceJobOutput jobOutput = new ConcreteFileMapReduceJobOutput(filePath, numSplits,
            outputFormatClass);//from  www  . j  a v a  2s .  c om

    Job job = new Job();
    jobOutput.configure(job);

    // The output format class should be set in the job configuration.
    assertEquals(outputFormatClass, job.getOutputFormatClass());
    // The file output path should be set in the job configuration.
    assert (FileOutputFormat.getOutputPath(job).toString().endsWith(filePath.toString()));
    // The number of reduce tasks should be set to the number of splits.
    assertEquals(numSplits, job.getNumReduceTasks());
}

From source file:com.moz.fiji.mapreduce.TestDistributedCacheJars.java

License:Apache License

/**
 * Pre: Requires mTempDir to be set and filled (only) with .jar files.
 * These don't need to actually be jars.
 *
 * Creates a new Job and checks that jars de-dupe.
 *
 * @throws IOException if configuration can not be created.
 *//*from  w w w  . ja v  a  2  s. co m*/
@Test
public void testJarsDeDupe() throws IOException {
    final File tempDir = getLocalTempDir();

    // Jar list should de-dupe to {"myjar_a, "myjar_b", "myjar_0", "myjar_1"}
    Set<String> dedupedJarNames = new HashSet<String>(4);
    dedupedJarNames.add("myjar_a.jar");
    dedupedJarNames.add("myjar_b.jar");
    dedupedJarNames.add("myjar_0.jar");
    dedupedJarNames.add("myjar_1.jar");

    Job job = new Job();

    List<String> someJars = new ArrayList<String>();
    // Some unique jar names.
    someJars.add("/somepath/myjar_a.jar");
    someJars.add("/another/path/myjar_b.jar");
    someJars.add("/myjar_0.jar");

    // Duplicate jars.
    someJars.add("/another/path/myjar_b.jar");
    someJars.add("/yet/another/path/myjar_b.jar");

    job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(someJars, ","));

    // Now add some duplicate jars from mTempDir.
    assertEquals(0, tempDir.list().length);
    createTestJars(tempDir, "myjar_0.jar", "myjar_1.jar");
    assertEquals(2, tempDir.list().length);
    DistributedCacheJars.addJarsToDistributedCache(job, tempDir);

    // Confirm each jar appears in de-dupe list exactly once.
    String listedJars = job.getConfiguration().get(CONF_TMPJARS);
    String[] jars = listedJars.split(",");
    for (String jar : jars) {
        // Check that path terminates in an expected jar.
        Path p = new Path(jar);
        assertTrue(dedupedJarNames.contains(p.getName()));
        dedupedJarNames.remove(p.getName());
    }
    assertEquals(0, dedupedJarNames.size());
}

From source file:com.moz.fiji.schema.mapreduce.TestDistributedCacheJars.java

License:Apache License

/**
 * Pre: Requires mTempDir to be set and filled (only) with .jar files.
 * These don't need to actually be jars.
 *
 * Creates a new Job and checks that jars de-dupe.
 *
 * @throws IOException if configuration can not be created.
 *///from  w w  w .  j ava  2  s.  c o m
@Test
public void testJarsDeDupe() throws IOException {
    // Jar list should de-dupe to {"myjar_a, "myjar_b", "myjar_0", "myjar_1"}
    Set<String> dedupedJarNames = new HashSet<String>(4);
    dedupedJarNames.add("myjar_a.jar");
    dedupedJarNames.add("myjar_b.jar");
    dedupedJarNames.add("myjar_0.jar");
    dedupedJarNames.add("myjar_1.jar");

    Job job = new Job();

    List<String> someJars = new ArrayList<String>();
    // Some unique jar names.
    someJars.add("/somepath/myjar_a.jar");
    someJars.add("/another/path/myjar_b.jar");
    someJars.add("/myjar_0.jar");

    // Duplicate jars.
    someJars.add("/another/path/myjar_b.jar");
    someJars.add("/yet/another/path/myjar_b.jar");

    job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(someJars, ","));

    // Now add some duplicate jars from mTempDir.
    assertEquals(0, mTempDir.getRoot().list().length);
    createTestJars("myjar_0.jar", "myjar_1.jar");
    assertEquals(2, mTempDir.getRoot().list().length);
    DistributedCacheJars.addJarsToDistributedCache(job, mTempDir.getRoot());

    // Confirm each jar appears in de-dupe list exactly once.
    String listedJars = job.getConfiguration().get(CONF_TMPJARS);
    String[] jars = listedJars.split(",");
    for (String jar : jars) {
        // Check that path terminates in an expected jar.
        Path p = new Path(jar);
        assertTrue(dedupedJarNames.contains(p.getName()));
        dedupedJarNames.remove(p.getName());
    }
    assertEquals(0, dedupedJarNames.size());
}

From source file:com.nikoo28.excel.mapreduce.ExcelDriver.java

License:Apache License

/**
 * Main entry point for the example./*from   ww w.  ja v a 2 s. c o  m*/
 *
 * @param args arguments
 * @throws Exception when something goes wrong
 */
public static void main(String[] args) throws Exception {
    logger.info("Driver started");

    Job job = new Job();
    job.setJarByClass(ExcelDriver.class);
    job.setJobName("Excel Record Reader");

    job.setMapperClass(ExcelMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(ExcelInputFormat.class);

    job.waitForCompletion(true);
}