List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:co.cask.cdap.examples.fileset.WordCount.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setMapperClass(Tokenizer.class); job.setReducerClass(Counter.class); job.setNumReduceTasks(1);/*from w ww . j a v a 2 s. c om*/ String inputDataset = context.getRuntimeArguments().get("input"); inputDataset = inputDataset != null ? inputDataset : "lines"; String outputDataset = context.getRuntimeArguments().get("output"); outputDataset = outputDataset != null ? outputDataset : "counts"; context.addInput(Input.ofDataset(inputDataset)); context.addOutput(Output.ofDataset(outputDataset)); }
From source file:co.cask.cdap.examples.loganalysis.HitCounterProgram.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setMapperClass(Emitter.class); job.setReducerClass(Counter.class); context.addInput(Input.ofStream(LogAnalysisApp.LOG_STREAM)); context.addOutput(Output.ofDataset(LogAnalysisApp.HIT_COUNT_STORE)); }
From source file:co.cask.cdap.examples.purchase.PurchaseHistoryBuilder.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setReducerClass(PerUserReducer.class); context.addInput(Input.ofDataset("purchases"), PurchaseMapper.class); context.addOutput(Output.ofDataset("history")); // override default memory usage if the corresponding runtime arguments are set. Map<String, String> runtimeArgs = context.getRuntimeArguments(); String mapperMemoryMBStr = runtimeArgs.get(MAPPER_MEMORY_MB); if (mapperMemoryMBStr != null) { context.setMapperResources(new Resources(Integer.parseInt(mapperMemoryMBStr))); }// w w w . j a v a2s . c o m String reducerMemoryMBStr = runtimeArgs.get(REDUCER_MEMORY_MB); if (reducerMemoryMBStr != null) { context.setReducerResources(new Resources(Integer.parseInt(reducerMemoryMBStr))); } }
From source file:co.cask.cdap.examples.sportresults.ScoreCounter.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setMapperClass(ResultsMapper.class); job.setReducerClass(TeamCounter.class); job.setNumReduceTasks(1);//from w w w. j a v a 2 s . c o m String league = context.getRuntimeArguments().get("league"); Preconditions.checkNotNull(league); // Configure the input to read all seasons for the league Map<String, String> inputArgs = Maps.newHashMap(); PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, PartitionFilter.builder().addValueCondition("league", league).build()); context.addInput(Input.ofDataset("results", inputArgs)); // Each run writes its output to a partition for the league Map<String, String> outputArgs = Maps.newHashMap(); PartitionKey outputKey = PartitionKey.builder().addStringField("league", league).build(); PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey); context.addOutput(Output.ofDataset("totals", outputArgs)); // used only for logging: PartitionedFileSet input = context.getDataset("results", inputArgs); PartitionedFileSet outputFileSet = context.getDataset("totals", outputArgs); String outputPath = FileSetArguments .getOutputPath(outputFileSet.getEmbeddedFileSet().getRuntimeArguments()); LOG.info("input: {}, output: {}", input.getEmbeddedFileSet().getInputLocations(), outputPath); }
From source file:co.cask.cdap.examples.wikipedia.TopNMapReduce.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { Map<String, String> runtimeArguments = context.getRuntimeArguments(); Job job = context.getHadoopJob(); WorkflowToken workflowToken = context.getWorkflowToken(); int topNRank = 10; if (runtimeArguments.containsKey("topn.rank")) { topNRank = Integer.parseInt(runtimeArguments.get("topn.rank")); }/*from w ww .j ava2s .c o m*/ if (workflowToken != null) { workflowToken.put("topn.rank", Value.of(topNRank)); } int numReduceTasks = 1; if (runtimeArguments.containsKey("num.reduce.tasks")) { numReduceTasks = Integer.parseInt(runtimeArguments.get("num.reduce.tasks")); } job.setNumReduceTasks(numReduceTasks); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(TopNReducer.class); context.addInput(Input.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET)); context.addOutput(Output.ofDataset(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT)); }
From source file:co.cask.cdap.internal.app.runtime.batch.AggregateMetricsByTag.java
License:Apache License
static void configureJob(Job job) throws IOException { job.setMapperClass(Map.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(Reduce.class); }
From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java
License:Apache License
/** * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined. * @param job The MapReduce job// w w w. j a v a 2s. c o m */ public static void wrap(Job job) { // NOTE: we don't use job.getReducerClass() as we don't need to load user class here Configuration conf = job.getConfiguration(); String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR); if (reducerClass != null) { conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass); job.setReducerClass(ReducerWrapper.class); } }
From source file:co.cask.cdap.internal.app.runtime.batch.WordCount.java
License:Apache License
public static void configureJob(Job job, String inputPath, String outputPath) throws IOException { job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); }
From source file:co.nubetech.hiho.dedup.DedupJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);//from ww w . j a v a 2 s . com try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("Dedup job"); job.setJarByClass(DedupJob.class); Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); if (dedupBy.equals("key")) { job.setMapperClass(DedupKeyMapper.class); job.setReducerClass(DedupKeyReducer.class); job.setMapOutputValueClass(inputValueClass); } else if (dedupBy.equals("value")) { job.setMapperClass(DedupValueMapper.class); job.setReducerClass(DedupValueReducer.class); job.setMapOutputValueClass(inputKeyClass); } job.setInputFormatClass(inputFormatClass); if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) { DelimitedTextInputFormat.setProperties(job, delimiter, column); } job.setMapOutputKeyClass(HihoTuple.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); job.setPartitionerClass(HihoHashPartitioner.class); FileInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue(); badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue(); duplicateRecords = totalRecordsRead - output; logger.info("Total records read are: " + totalRecordsRead); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); logger.info("Duplicate records are: " + duplicateRecords); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.job.ExportToFTPServer.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf);//from w w w . ja va 2s .c om try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); } Job job = new Job(conf); job.setMapperClass(TokenCounterMapper.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(FTPTextOutputFormat.class); FTPTextOutputFormat.setOutputPath(job, new Path(outputPath)); job.setJarByClass(ExportToFTPServer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }