Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;/*  w w  w . j  a  v  a 2 s .  co m*/
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.SnmpStatistic_MapReduceChain.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    //******************This is the first job.******************/
    final Configuration firstConf = new Configuration();
    MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp");
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;//  w  w w  .j av  a 2  s .  c  o  m
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmp_with_shards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table);
    final Job firstJob = new Job(firstConf, "snmp analysis " + output_table);
    firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    firstJob.setMapperClass(MapHostUploadEachAPEachDay.class);
    firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class);
    firstJob.setOutputKeyClass(Text.class);
    firstJob.setOutputValueClass(LongWritable.class);
    firstJob.setInputFormatClass(MongoInputFormat.class);
    firstJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = firstJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }

    //*****************This is the second job.********************/       
    final Configuration secondConf = new Configuration();
    MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table2 = null;
    if (use_chunks) {
        if (use_shards)
            output_table2 = "second_snmp_with_shards_and_chunks";
        else
            output_table2 = "second_snmp_with_chunks";
    } else {
        if (use_shards)
            output_table2 = "second_snmp_with_shards";
        else
            output_table2 = "second_snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2);
    final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2);
    secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class);
    secondJob.setMapperClass(MapHostUploadEachDay.class);
    secondJob.setReducerClass(ReduceHostUploadEachDay.class);
    secondJob.setOutputKeyClass(Text.class);
    secondJob.setOutputValueClass(LongWritable.class);
    secondJob.setInputFormatClass(MongoInputFormat.class);
    secondJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result2 = secondJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result2);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.examples.ufos.UfoSightings.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();

    final Job job = new Job(conf, "ufo-sightings");

    job.setMapperClass(UfoSightingsMapper.class);
    job.setReducerClass(UfoSightingsReducer.class);
    job.setOutputFormatClass(com.mongodb.hadoop.MongoOutputFormat.class);
    job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
    job.setOutputValueClass(org.apache.hadoop.io.IntWritable.class);
    job.setInputFormatClass(com.mongodb.hadoop.MongoInputFormat.class);

    final boolean verbose = true;
    try {/*from w  ww  .j  av a  2 s.co  m*/
        if (BACKGROUND) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    boolean useQuery = false;
    for (int i = 0; i < args.length; i++) {
        final String argi = args[i];
        if (argi.equals("--use-query"))
            useQuery = true;/*from w ww. j  a va 2  s  . c o  m*/
        else {
            throw new IllegalArgumentException(argi);
        }
    }

    if (useQuery) {
        //NOTE: must do this BEFORE Job is created
        final MongoConfig mongo_conf = new MongoConfig(conf);
        com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
        query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 })));
        System.out.println(" --- setting query on num");
        mongo_conf.setQuery(query);
        System.out.println(" --- query is: " + mongo_conf.getQuery());
    }

    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();

    final Job job = new Job(conf, "word count " + outputCollectionName);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out
            .println(" ----------------------- running test " + outputCollectionName + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.err.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
        return 1;
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(outputUri);
    com.mongodb.DB db = m.getDB(outputUri.getDatabase());
    com.mongodb.DBCollection coll = db.getCollection(outputCollectionName);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    return 0; //is the return value supposed to be the program exit code?

    //        if (! result)
    //           System.exit(  1 );
}

From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java

License:Apache License

private final static void test(boolean useShards, boolean useChunks, Boolean slaveok, boolean useQuery)
        throws Exception {
    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines");
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, useShards);
    conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, useChunks);

    if (useQuery) {
        //NOTE: must do this BEFORE Job is created
        final MongoConfig mongo_conf = new MongoConfig(conf);
        com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
        query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 })));
        System.out.println(" --- setting query on num");
        mongo_conf.setQuery(query);/*from  w w  w. j  a v  a 2  s. co m*/
        System.out.println(" --- query is: " + mongo_conf.getQuery());
    }

    String output_table = null;
    if (useChunks) {
        if (useShards)
            output_table = "with_shards_and_chunks";
        else
            output_table = "with_chunks";
    } else {
        if (useShards)
            output_table = "with_shards";
        else
            output_table = "no_splits";
    }
    if (slaveok != null) {
        output_table += "_" + slaveok;
    }
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table);
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "word count " + output_table);

    job.setJarByClass(WordCountSplitTest.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    final long start = System.currentTimeMillis();
    System.out.println(" ----------------------- running test " + output_table + " --------------------");
    try {
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
    final long end = System.currentTimeMillis();
    final float seconds = ((float) (end - start)) / 1000;
    java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);
    System.out.println("finished run in " + nf.format(seconds) + " seconds");

    com.mongodb.Mongo m = new com.mongodb.Mongo(
            new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true"));
    com.mongodb.DB db = m.getDB("test");
    com.mongodb.DBCollection coll = db.getCollection(output_table);
    com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject();
    query.put("_id", "the");
    com.mongodb.DBCursor cur = coll.find(query);
    if (!cur.hasNext())
        System.out.println("FAILURE: could not find count of \'the\'");
    else
        System.out.println("'the' count: " + cur.next());

    //        if (! result)
    //           System.exit(  1 );
}

From source file:com.mongodb.hadoop.examples.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost/test.in");
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.out");
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "word count");

    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapReduceJob(final Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, getJobName());
    /**//  ww  w. ja  v a 2  s.  c om
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MongoConfigUtil.getReducer(conf));

    job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MongoConfigUtil.isJobBackground(conf);
    try {
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce reducer for the job.
 *
 * @param job The Hadoop MR job.//  w w  w  .  j a  v a  2s  . c o m
 * @throws IOException If there is an error.
 */
protected void configureReducer(Job job) throws IOException {
    final FijiReducer<?, ?, ?, ?> reducer = getReducer();
    if (null == reducer) {
        LOG.info("No reducer provided. This will be a map-only job");
        job.setNumReduceTasks(0);

        // Set the job output key/value classes based on what the map output key/value classes were
        // since this a map-only job.
        job.setOutputKeyClass(job.getMapOutputKeyClass());
        Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration());
        if (null != mapOutputKeySchema) {
            AvroJob.setOutputKeySchema(job, mapOutputKeySchema);
        }
        job.setOutputValueClass(job.getMapOutputValueClass());
        Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration());
        if (null != mapOutputValueSchema) {
            AvroJob.setOutputValueSchema(job, mapOutputValueSchema);
        }
        return;
    }
    if (reducer instanceof Configurable) {
        ((Configurable) reducer).setConf(job.getConfiguration());
    }
    job.setReducerClass(reducer.getClass());

    // Set output key class.
    Class<?> outputKeyClass = reducer.getOutputKeyClass();
    job.setOutputKeyClass(outputKeyClass);
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer);
    if (AvroKey.class.isAssignableFrom(outputKeyClass)) {
        if (null == outputKeyWriterSchema) {
            throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroKeyWriter in your FijiReducer?");
        }
        AvroJob.setOutputKeySchema(job, outputKeyWriterSchema);
    } else if (null != outputKeyWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                        + " but the output key class was not AvroKey.");
    }

    // Set output value class.
    Class<?> outputValueClass = reducer.getOutputValueClass();
    job.setOutputValueClass(outputValueClass);
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer);
    if (AvroValue.class.isAssignableFrom(outputValueClass)) {
        if (null == outputValueWriterSchema) {
            throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroValueWriter in your FijiReducer?");
        }
        AvroJob.setOutputValueSchema(job, outputValueWriterSchema);
    } else if (null != outputValueWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                        + " but the output value class was not AvroValue.");
    }
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }/*w ww . java2  s.  c  om*/
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}