List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.antbrains.crf.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w.ja va 2 s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.app.hadoopexample.MaxTemperatureDriver.java
public int run(String[] arg) throws Exception { String[] args = { "C:/Hadoop/input/LICENSE.txt", "C:/Hadoop/output/LICENSE.txt" }; if (args.length != 2) { System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>"); System.exit(-1);/*from w w w.j a v a 2 s . c o m*/ } Job job = new Job(); job.setJarByClass(MaxTemperatureDriver.class); job.setJobName("Max Temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
/** API for Java clients;visible for testing;may become a public API eventually */ int run(Options options) throws Exception { if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) { throw new IllegalStateException( "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + "which is required for passing files via --files and --libjars"); }/* w w w . java 2s .c o m*/ long programStartTime = System.nanoTime(); getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (options.log4jConfigFile != null) { Utils.setLogConfigFile(options.log4jConfigFile, getConf()); addDistributedCacheFile(options.log4jConfigFile, getConf()); } Configuration config = HBaseConfiguration.create(); Job job = Job.getInstance(config); job.setJarByClass(getClass()); // To be able to run this example from eclipse, we need to make sure // the built jar is distributed to the map-reduce tasks from the // local file system. job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar")); FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { return -1; } Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); Path outputReduceDir = new Path(options.outputDir, "reducers"); int reducers = 1; Scan scan = new Scan(); scan.addFamily(CF); // tag::SETUP[] scan.setCaching(500); // <1> scan.setCacheBlocks(false); // <2> TableMapReduceUtil.initTableMapperJob( // <3> options.inputTable, // Input HBase table name scan, // Scan instance to control what to index HBaseAvroToSOLRMapper.class, // Mapper to parse cells content. Text.class, // Mapper output key SolrInputDocumentWritable.class, // Mapper output value job); FileOutputFormat.setOutputPath(job, outputReduceDir); job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class)); job.setReducerClass(SolrReducer.class); // <4> job.setPartitionerClass(SolrCloudPartitioner.class); // <5> job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); job.setOutputFormatClass(SolrOutputFormat.class); SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SolrInputDocumentWritable.class); job.setSpeculativeExecution(false); // end::SETUP[] job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have. if (!waitForCompletion(job, true)) { return -1;// job failed } // ------------------------------------------------------------------------------------------------------------------------------------- assert reducers == options.shards; // normalize output shard dir prefix, i.e. // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) for (FileStatus stats : fs.listStatus(outputReduceDir)) { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); Path dstPath = new Path(srcPath.getParent(), dstName); if (!rename(srcPath, dstPath, fs)) { return -1; } } } ; // publish results dir if (!rename(outputReduceDir, outputResultsDir, fs)) { return -1; } if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) { return -1; } goodbye(job, programStartTime); return 0; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for map-reduce job./* ww w . j av a 2 s . c om*/ * @throws Exception if failed */ @Test public void map_reduce() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setSortComparatorClass(Text.Comparator.class); job.setGroupingComparatorClass(Text.Comparator.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, new String[] { "a b c d", "a a b c", "c", }); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", }))); }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
@SuppressWarnings("rawtypes") private void configureShuffle(Job job, VariableTable variables) { Class<? extends Reducer> reducer = getReducerClassOrNull(); if (reducer != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$ }/*w w w. j a v a2 s . com*/ job.setReducerClass(reducer); } else { if (LOG.isDebugEnabled()) { LOG.debug("Reducer: N/A"); //$NON-NLS-1$ } job.setNumReduceTasks(0); return; } Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class); Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$ outputKeyClass.getName(), outputValueClass.getName())); } job.setMapOutputKeyClass(outputKeyClass); job.setMapOutputValueClass(outputValueClass); Class<? extends Reducer> combiner = getCombinerClassOrNull(); if (combiner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$ } job.setCombinerClass(combiner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Combiner: N/A"); //$NON-NLS-1$ } } Class<? extends Partitioner> partitioner = getPartitionerClassOrNull(); if (partitioner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$ } job.setPartitionerClass(partitioner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull(); if (groupingComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$ } job.setGroupingComparatorClass(groupingComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull(); if (sortComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$ } job.setSortComparatorClass(sortComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$ } } }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateMerge() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, MergeJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, MergeJoinPatchMapper.class)); StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(PatchApplyKey.class); job.setMapOutputValueClass(modelClass); // combiner may have no effect in normal cases job.setReducerClass(MergeJoinReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); job.setPartitionerClass(PatchApplyKey.Partitioner.class); job.setSortComparatorClass(PatchApplyKey.SortComparator.class); job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try {/*from w w w .j av a 2 s . c o m*/ boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asp.tranlog.ImportTsv.java
License:Apache License
/** * Sets up the actual job.// w w w. j ava 2s . c om * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes()))); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); String inputCodec = conf.get(INPUT_LZO_KEY); if (inputCodec == null) { FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split // size = // 64m job.setInputFormatClass(TextInputFormat.class); } else { if (inputCodec.equalsIgnoreCase("lzo")) job.setInputFormatClass(LzoTextInputFormat.class); else { usage("not supported compression codec!"); System.exit(-1); } } job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* * Guava used by TsvParser */); return job; }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoop.BasicDriverMapReduce.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("Missing input and output filenames. Exiting."); System.exit(1);/*from w w w . j ava2s .c om*/ } Job job = new Job(super.getConf()); job.setJarByClass(BasicDriverMapReduce.class); job.setJobName("BasicDriver1"); job.setMapperClass(BasicMapper.class); job.setReducerClass(BasicReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(RawFileAsBinaryInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.BasicDriverMapReduce.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("Missing input and output filenames. Exiting."); System.exit(1);//from ww w .j a v a2 s .c o m } @SuppressWarnings("deprecation") Job job = new Job(super.getConf()); job.setJarByClass(BasicDriverMapReduce.class); job.setJobName("BasicDriverMapReduce"); job.setMapperClass(BasicMapper.class); job.setReducerClass(BasicReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CallDetailRecord.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(RawFileAsBinaryInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java
License:Apache License
public int run(String[] args) throws Exception { String input = null;// ww w .ja v a2s . co m String output = null; if (args.length < 2) { System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName()); return -1; } else { input = args[0]; output = args[1]; } Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver"); Configuration conf = job.getConfiguration(); //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write) String fapath = createTempFileAppender(job); //get schema Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class); job.getConfiguration().set("outSchema", outSchema.toString()); //Job conf settings job.setJarByClass(BinRecToAvroRecDriver.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(BinRecInputFormat.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroJob.setOutputKeySchema(job, outSchema); AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setMapOutputValueSchema(job, outSchema); //Job output compression FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC); //Input and Output Paths FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); boolean jobCompletionStatus = job.waitForCompletion(true); //Print Custom Counters before exiting Counters counters = job.getCounters(); for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) { Counter thisCounter = counters.findCounter(customCounter); System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue()); } long mycnt1 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT1") .getValue(); long mycnt2 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT2") .getValue(); long mycnt3 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT3") .getValue(); long myfakekpi = mycnt1 - mycnt2; String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi); System.out.println(msgMyfakekpi); logger.info(msgMyfakekpi); //Finished, so move job log to HDFS in _log dir, clean copyTempFileAppenderToHDFSOutpath(job, fapath, output); return jobCompletionStatus ? 0 : 1; }