List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.mongodb.hadoop.examples.snmp.SnmpStatistic_MapReduceChain.java
License:Apache License
public static void main(String[] args) throws Exception { boolean use_shards = true; boolean use_chunks = false; //******************This is the first job.******************/ final Configuration firstConf = new Configuration(); MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp"); firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); String output_table = null;/* w w w . j a v a 2 s . co m*/ if (use_chunks) { if (use_shards) output_table = "snmp_with_shards_and_chunks"; else output_table = "snmp_with_chunks"; } else { if (use_shards) output_table = "snmp_with_shards"; else output_table = "snmp_no_splits"; } MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table); final Job firstJob = new Job(firstConf, "snmp analysis " + output_table); firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class); firstJob.setMapperClass(MapHostUploadEachAPEachDay.class); firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class); firstJob.setOutputKeyClass(Text.class); firstJob.setOutputValueClass(LongWritable.class); firstJob.setInputFormatClass(MongoInputFormat.class); firstJob.setOutputFormatClass(MongoOutputFormat.class); try { boolean result = firstJob.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } //*****************This is the second job.********************/ final Configuration secondConf = new Configuration(); MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table); secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); String output_table2 = null; if (use_chunks) { if (use_shards) output_table2 = "second_snmp_with_shards_and_chunks"; else output_table2 = "second_snmp_with_chunks"; } else { if (use_shards) output_table2 = "second_snmp_with_shards"; else output_table2 = "second_snmp_no_splits"; } MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2); final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2); secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class); secondJob.setMapperClass(MapHostUploadEachDay.class); secondJob.setReducerClass(ReduceHostUploadEachDay.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(LongWritable.class); secondJob.setInputFormatClass(MongoInputFormat.class); secondJob.setOutputFormatClass(MongoOutputFormat.class); try { boolean result2 = secondJob.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result2); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } }
From source file:com.mongodb.hadoop.examples.SnmpStatistic_MapReduceChain.java
License:Apache License
public static void main(String[] args) throws Exception { boolean use_shards = true; boolean use_chunks = false; //******************This is the first job.******************/ final Configuration firstConf = new Configuration(); MongoConfigUtil.setInputURI(firstConf, "mongodb://localhost:30000/test.snmp"); firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); firstConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); String output_table = null;// w w w .j av a 2 s . c o m if (use_chunks) { if (use_shards) output_table = "snmp_with_shards_and_chunks"; else output_table = "snmp_with_chunks"; } else { if (use_shards) output_table = "snmp_with_shards"; else output_table = "snmp_no_splits"; } MongoConfigUtil.setOutputURI(firstConf, "mongodb://localhost:30000/test." + output_table); final Job firstJob = new Job(firstConf, "snmp analysis " + output_table); firstJob.setJarByClass(SnmpStatistic_MapReduceChain.class); firstJob.setMapperClass(MapHostUploadEachAPEachDay.class); firstJob.setReducerClass(ReduceHostUploadEachAPEachDay.class); firstJob.setOutputKeyClass(Text.class); firstJob.setOutputValueClass(LongWritable.class); firstJob.setInputFormatClass(MongoInputFormat.class); firstJob.setOutputFormatClass(MongoOutputFormat.class); try { boolean result = firstJob.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } //*****************This is the second job.********************/ final Configuration secondConf = new Configuration(); MongoConfigUtil.setInputURI(secondConf, "mongodb://localhost:30000/test." + output_table); secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards); secondConf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks); String output_table2 = null; if (use_chunks) { if (use_shards) output_table2 = "second_snmp_with_shards_and_chunks"; else output_table2 = "second_snmp_with_chunks"; } else { if (use_shards) output_table2 = "second_snmp_with_shards"; else output_table2 = "second_snmp_no_splits"; } MongoConfigUtil.setOutputURI(secondConf, "mongodb://localhost:30000/test." + output_table2); final Job secondJob = new Job(secondConf, "snmp analysis " + output_table2); secondJob.setJarByClass(SnmpStatistic_MapReduceChain.class); secondJob.setMapperClass(MapHostUploadEachDay.class); secondJob.setReducerClass(ReduceHostUploadEachDay.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(LongWritable.class); secondJob.setInputFormatClass(MongoInputFormat.class); secondJob.setOutputFormatClass(MongoOutputFormat.class); try { boolean result2 = secondJob.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result2); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } }
From source file:com.mongodb.hadoop.examples.ufos.UfoSightings.java
License:Apache License
public int run(final String[] args) throws Exception { final Configuration conf = getConf(); final Job job = new Job(conf, "ufo-sightings"); job.setMapperClass(UfoSightingsMapper.class); job.setReducerClass(UfoSightingsReducer.class); job.setOutputFormatClass(com.mongodb.hadoop.MongoOutputFormat.class); job.setOutputKeyClass(org.apache.hadoop.io.Text.class); job.setOutputValueClass(org.apache.hadoop.io.IntWritable.class); job.setInputFormatClass(com.mongodb.hadoop.MongoInputFormat.class); final boolean verbose = true; try {/*from w ww .j av a 2 s.co m*/ if (BACKGROUND) { LOG.info("Setting up and running MapReduce job in background."); job.submit(); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); return job.waitForCompletion(true) ? 0 : 1; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java
License:Apache License
@Override public int run(String[] args) throws Exception { final Configuration conf = getConf(); boolean useQuery = false; for (int i = 0; i < args.length; i++) { final String argi = args[i]; if (argi.equals("--use-query")) useQuery = true;/*from w ww. j a va 2 s . c o m*/ else { throw new IllegalArgumentException(argi); } } if (useQuery) { //NOTE: must do this BEFORE Job is created final MongoConfig mongo_conf = new MongoConfig(conf); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 }))); System.out.println(" --- setting query on num"); mongo_conf.setQuery(query); System.out.println(" --- query is: " + mongo_conf.getQuery()); } final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf); if (outputUri == null) throw new IllegalStateException("output uri is not set"); if (MongoConfigUtil.getInputURI(conf) == null) throw new IllegalStateException("input uri is not set"); final String outputCollectionName = outputUri.getCollection(); final Job job = new Job(conf, "word count " + outputCollectionName); job.setJarByClass(WordCountSplitTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); final long start = System.currentTimeMillis(); System.out .println(" ----------------------- running test " + outputCollectionName + " --------------------"); try { boolean result = job.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.err.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); return 1; } final long end = System.currentTimeMillis(); final float seconds = ((float) (end - start)) / 1000; java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); nf.setMaximumFractionDigits(3); System.out.println("finished run in " + nf.format(seconds) + " seconds"); com.mongodb.Mongo m = new com.mongodb.Mongo(outputUri); com.mongodb.DB db = m.getDB(outputUri.getDatabase()); com.mongodb.DBCollection coll = db.getCollection(outputCollectionName); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("_id", "the"); com.mongodb.DBCursor cur = coll.find(query); if (!cur.hasNext()) System.out.println("FAILURE: could not find count of \'the\'"); else System.out.println("'the' count: " + cur.next()); return 0; //is the return value supposed to be the program exit code? // if (! result) // System.exit( 1 ); }
From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java
License:Apache License
private final static void test(boolean useShards, boolean useChunks, Boolean slaveok, boolean useQuery) throws Exception { final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines"); conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, useShards); conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, useChunks); if (useQuery) { //NOTE: must do this BEFORE Job is created final MongoConfig mongo_conf = new MongoConfig(conf); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 }))); System.out.println(" --- setting query on num"); mongo_conf.setQuery(query);/*from w w w. j a v a 2 s. co m*/ System.out.println(" --- query is: " + mongo_conf.getQuery()); } String output_table = null; if (useChunks) { if (useShards) output_table = "with_shards_and_chunks"; else output_table = "with_chunks"; } else { if (useShards) output_table = "with_shards"; else output_table = "no_splits"; } if (slaveok != null) { output_table += "_" + slaveok; } MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table); System.out.println("Conf: " + conf); final Job job = new Job(conf, "word count " + output_table); job.setJarByClass(WordCountSplitTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); final long start = System.currentTimeMillis(); System.out.println(" ----------------------- running test " + output_table + " --------------------"); try { boolean result = job.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } final long end = System.currentTimeMillis(); final float seconds = ((float) (end - start)) / 1000; java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); nf.setMaximumFractionDigits(3); System.out.println("finished run in " + nf.format(seconds) + " seconds"); com.mongodb.Mongo m = new com.mongodb.Mongo( new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true")); com.mongodb.DB db = m.getDB("test"); com.mongodb.DBCollection coll = db.getCollection(output_table); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("_id", "the"); com.mongodb.DBCursor cur = coll.find(query); if (!cur.hasNext()) System.out.println("FAILURE: could not find count of \'the\'"); else System.out.println("'the' count: " + cur.next()); // if (! result) // System.exit( 1 ); }
From source file:com.mongodb.hadoop.examples.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/test.in"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.out"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.mongodb.hadoop.util.MongoTool.java
License:Apache License
private int runMapReduceJob(final Configuration conf) throws IOException { final Job job = Job.getInstance(conf, getJobName()); /**// ww w. ja v a 2 s. c om * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf); LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI)); job.setMapperClass(mapper); Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MongoConfigUtil.getReducer(conf)); job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf)); job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MongoConfigUtil.isJobBackground(conf); try { if (background) { LOG.info("Setting up and running MapReduce job in background."); job.submit(); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); return job.waitForCompletion(true) ? 0 : 1; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the MapReduce reducer for the job. * * @param job The Hadoop MR job.// w w w . j a v a 2s . c o m * @throws IOException If there is an error. */ protected void configureReducer(Job job) throws IOException { final FijiReducer<?, ?, ?, ?> reducer = getReducer(); if (null == reducer) { LOG.info("No reducer provided. This will be a map-only job"); job.setNumReduceTasks(0); // Set the job output key/value classes based on what the map output key/value classes were // since this a map-only job. job.setOutputKeyClass(job.getMapOutputKeyClass()); Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration()); if (null != mapOutputKeySchema) { AvroJob.setOutputKeySchema(job, mapOutputKeySchema); } job.setOutputValueClass(job.getMapOutputValueClass()); Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration()); if (null != mapOutputValueSchema) { AvroJob.setOutputValueSchema(job, mapOutputValueSchema); } return; } if (reducer instanceof Configurable) { ((Configurable) reducer).setConf(job.getConfiguration()); } job.setReducerClass(reducer.getClass()); // Set output key class. Class<?> outputKeyClass = reducer.getOutputKeyClass(); job.setOutputKeyClass(outputKeyClass); Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer); if (AvroKey.class.isAssignableFrom(outputKeyClass)) { if (null == outputKeyWriterSchema) { throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. " + "Did you forget to implement AvroKeyWriter in your FijiReducer?"); } AvroJob.setOutputKeySchema(job, outputKeyWriterSchema); } else if (null != outputKeyWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); } // Set output value class. Class<?> outputValueClass = reducer.getOutputValueClass(); job.setOutputValueClass(outputValueClass); Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer); if (AvroValue.class.isAssignableFrom(outputValueClass)) { if (null == outputValueWriterSchema) { throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. " + "Did you forget to implement AvroValueWriter in your FijiReducer?"); } AvroJob.setOutputValueSchema(job, outputValueWriterSchema); } else if (null != outputValueWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue."); } }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter) throws Exception { final Job job = new Job(createConfiguration()); final Configuration conf = job.getConfiguration(); // Get settings for test. final FijiDataRequest request = FijiDataRequest.builder() .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build(); job.setJarByClass(IntegrationTestFijiTableInputFormat.class); // Setup the InputFormat. FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter); job.setInputFormatClass(HBaseFijiTableInputFormat.class); // Duplicate functionality from MapReduceJobBuilder, since we are not using it here: final List<Path> jarFiles = Lists.newArrayList(); final FileSystem fs = FileSystem.getLocal(conf); for (String cpEntry : System.getProperty("java.class.path").split(":")) { if (cpEntry.endsWith(".jar")) { jarFiles.add(fs.makeQualified(new Path(cpEntry))); }/*w ww . java2 s. c om*/ } DistributedCacheJars.addJarsToDistributedCache(job, jarFiles); // Create a test job. job.setJobName(jobName); // Setup the OutputFormat. TextOutputFormat.setOutputPath(job, outputFile.getParent()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // Set the mapper class. if (null != mapperClass) { job.setMapperClass(mapperClass); } // Set the reducer class. if (null != reducerClass) { job.setReducerClass(reducerClass); } return job; }