Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;/*  w w  w . j  a  v  a 2 s .  co m*/
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;//  w  w w  .j av  a 2  s .  c  o  m
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/       
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.ufos.UfoSightings.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();

    final Job job = new Job(conf, "ufo-sightings");

    job.setMapperClass(UfoSightingsMapper.class);
    job.setReducerClass(UfoSightingsReducer.class);
    job.setOutputFormatClass(com.mongodb.hadoop.MongoOutputFormat.class);
    job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
    job.setOutputValueClass(org.apache.hadoop.io.IntWritable.class);
    job.setInputFormatClass(com.mongodb.hadoop.MongoInputFormat.class);

    final boolean verbose = true;
    try {/*from w  ww  .j  av a  2 s.co  m*/
        if (BACKGROUND) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    boolean useQuery = false;
    for (int i = 0; i < args.length; i++) {
        final String argi = args[i];
        if (argi.equals("--use-query"))
            useQuery = true;/*from w ww. j  a va 2  s  . c o  m*/
        else {
            throw new IllegalArgumentException(argi);
        }
    }

    if (useQuery) {
        //NOTE: must do this BEFORE Job is created
        final MongoConfig mongo_conf = new MongoConfig(conf);
        com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
        query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 })));
        System.out.println(" --- setting query on num");
        mongo_conf.setQuery(query);
        System.out.println(" --- query is: " + mongo_conf.getQuery());
    }

    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();

    final Job job = new Job(conf, "word count " + outputCollectionName);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out
            .println(" ----------------------- running test " + outputCollectionName + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.err.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
        return 1;
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(outputUri);
    com.mongodb.DB db = m.getDB(outputUri.getDatabase());
    com.mongodb.DBCollection coll = db.getCollection(outputCollectionName);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    return 0; //is the return value supposed to be the program exit code?

    //        if (! result)
    //           System.exit(  1 );
}

From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java

License:Apache License

private final static void test(boolean useShards, boolean useChunks, Boolean slaveok, boolean useQuery)
        throws Exception {
    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines");
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, useShards);
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, useChunks);

    if (useQuery) {
        //NOTE: must do this BEFORE Job is created
        final MongoConfig mongo_conf = new MongoConfig(conf);
        com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
        query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 })));
        System.out.println(" --- setting query on num");
        mongo_conf.setQuery(query);/*from  w w  w. j  a v  a 2  s. co m*/
        System.out.println(" --- query is: " + mongo_conf.getQuery());
    }

    String output_table = null;
    if (useChunks) {
        if (useShards)
            output_table = "with_shards_and_chunks";
        else
            output_table = "with_chunks";
    } else {
        if (useShards)
            output_table = "with_shards";
        else
            output_table = "no_splits";
    }
    if (slaveok != null) {
        output_table += "_" + slaveok;
    }
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table);
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "word count " + output_table);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out.println(" ----------------------- running test " + output_table + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(
            new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true"));
    com.mongodb.DB db = m.getDB("test");
    com.mongodb.DBCollection coll = db.getCollection(output_table);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    //        if (! result)
    //           System.exit(  1 );
}

From source file:com.mongodb.hadoop.examples.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost/test.in");
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.out");
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "word count");

    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapReduceJob(final Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, getJobName());
    /**//  ww  w. ja  v a 2  s.  c om
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MongoConfigUtil.getReducer(conf));

    job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MongoConfigUtil.isJobBackground(conf);
    try {
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce reducer for the job.
 *
 * @param job The Hadoop MR job.//  w w  w  .  j a  v a  2s  . c o m
 * @throws IOException If there is an error.
 */
protected void configureReducer(Job job) throws IOException {
    final FijiReducer<?, ?, ?, ?> reducer = getReducer();
    if (null == reducer) {
        LOG.info("No reducer provided. This will be a map-only job");
        job.setNumReduceTasks(0);

        // Set the job output key/value classes based on what the map output key/value classes were
        // since this a map-only job.
        job.setOutputKeyClass(job.getMapOutputKeyClass());
        Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration());
        if (null != mapOutputKeySchema) {
            AvroJob.setOutputKeySchema(job, mapOutputKeySchema);
        }
        job.setOutputValueClass(job.getMapOutputValueClass());
        Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration());
        if (null != mapOutputValueSchema) {
            AvroJob.setOutputValueSchema(job, mapOutputValueSchema);
        }
        return;
    }
    if (reducer instanceof Configurable) {
        ((Configurable) reducer).setConf(job.getConfiguration());
    }
    job.setReducerClass(reducer.getClass());

    // Set output key class.
    Class<?> outputKeyClass = reducer.getOutputKeyClass();
    job.setOutputKeyClass(outputKeyClass);
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer);
    if (AvroKey.class.isAssignableFrom(outputKeyClass)) {
        if (null == outputKeyWriterSchema) {
            throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroKeyWriter in your FijiReducer?");
        }
        AvroJob.setOutputKeySchema(job, outputKeyWriterSchema);
    } else if (null != outputKeyWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                        + " but the output key class was not AvroKey.");
    }

    // Set output value class.
    Class<?> outputValueClass = reducer.getOutputValueClass();
    job.setOutputValueClass(outputValueClass);
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer);
    if (AvroValue.class.isAssignableFrom(outputValueClass)) {
        if (null == outputValueWriterSchema) {
            throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroValueWriter in your FijiReducer?");
        }
        AvroJob.setOutputValueSchema(job, outputValueWriterSchema);
    } else if (null != outputValueWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                        + " but the output value class was not AvroValue.");
    }
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }/*w ww . java2  s.  c  om*/
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}