Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:co.cask.cdap.examples.fileset.WordCount.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(Tokenizer.class);
    job.setReducerClass(Counter.class);
    job.setNumReduceTasks(1);/*from  w ww  .  j  a v a  2  s. c  om*/

    String inputDataset = context.getRuntimeArguments().get("input");
    inputDataset = inputDataset != null ? inputDataset : "lines";

    String outputDataset = context.getRuntimeArguments().get("output");
    outputDataset = outputDataset != null ? outputDataset : "counts";

    context.addInput(Input.ofDataset(inputDataset));
    context.addOutput(Output.ofDataset(outputDataset));
}

From source file:co.cask.cdap.examples.loganalysis.HitCounterProgram.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(Emitter.class);
    job.setReducerClass(Counter.class);

    context.addInput(Input.ofStream(LogAnalysisApp.LOG_STREAM));
    context.addOutput(Output.ofDataset(LogAnalysisApp.HIT_COUNT_STORE));
}

From source file:co.cask.cdap.examples.purchase.PurchaseHistoryBuilder.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setReducerClass(PerUserReducer.class);

    context.addInput(Input.ofDataset("purchases"), PurchaseMapper.class);
    context.addOutput(Output.ofDataset("history"));

    // override default memory usage if the corresponding runtime arguments are set.
    Map<String, String> runtimeArgs = context.getRuntimeArguments();
    String mapperMemoryMBStr = runtimeArgs.get(MAPPER_MEMORY_MB);
    if (mapperMemoryMBStr != null) {
        context.setMapperResources(new Resources(Integer.parseInt(mapperMemoryMBStr)));
    }//  w w w  . j  a v  a2s .  c o  m
    String reducerMemoryMBStr = runtimeArgs.get(REDUCER_MEMORY_MB);
    if (reducerMemoryMBStr != null) {
        context.setReducerResources(new Resources(Integer.parseInt(reducerMemoryMBStr)));
    }
}

From source file:co.cask.cdap.examples.sportresults.ScoreCounter.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(ResultsMapper.class);
    job.setReducerClass(TeamCounter.class);
    job.setNumReduceTasks(1);//from w w  w.  j a v a 2 s .  c  o  m

    String league = context.getRuntimeArguments().get("league");
    Preconditions.checkNotNull(league);

    // Configure the input to read all seasons for the league
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs,
            PartitionFilter.builder().addValueCondition("league", league).build());
    context.addInput(Input.ofDataset("results", inputArgs));

    // Each run writes its output to a partition for the league
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionKey outputKey = PartitionKey.builder().addStringField("league", league).build();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
    context.addOutput(Output.ofDataset("totals", outputArgs));

    // used only for logging:
    PartitionedFileSet input = context.getDataset("results", inputArgs);
    PartitionedFileSet outputFileSet = context.getDataset("totals", outputArgs);
    String outputPath = FileSetArguments
            .getOutputPath(outputFileSet.getEmbeddedFileSet().getRuntimeArguments());
    LOG.info("input: {}, output: {}", input.getEmbeddedFileSet().getInputLocations(), outputPath);
}

From source file:co.cask.cdap.examples.wikipedia.TopNMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Map<String, String> runtimeArguments = context.getRuntimeArguments();
    Job job = context.getHadoopJob();
    WorkflowToken workflowToken = context.getWorkflowToken();
    int topNRank = 10;
    if (runtimeArguments.containsKey("topn.rank")) {
        topNRank = Integer.parseInt(runtimeArguments.get("topn.rank"));
    }/*from w  ww  .j ava2s  .c  o  m*/
    if (workflowToken != null) {
        workflowToken.put("topn.rank", Value.of(topNRank));
    }
    int numReduceTasks = 1;
    if (runtimeArguments.containsKey("num.reduce.tasks")) {
        numReduceTasks = Integer.parseInt(runtimeArguments.get("num.reduce.tasks"));
    }
    job.setNumReduceTasks(numReduceTasks);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(TopNReducer.class);
    context.addInput(Input.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT));
}

From source file:co.cask.cdap.internal.app.runtime.batch.AggregateMetricsByTag.java

License:Apache License

static void configureJob(Job job) throws IOException {
    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(Reduce.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job//  w  w w. j  a v a  2s. c o  m
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getReducerClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
    if (reducerClass != null) {
        conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass);
        job.setReducerClass(ReducerWrapper.class);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.WordCount.java

License:Apache License

public static void configureJob(Job job, String inputPath, String outputPath) throws IOException {
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);//from   ww w .  j  a  v  a 2 s  .  com
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:co.nubetech.hiho.job.ExportToFTPServer.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    populateConfiguration(args, conf);//from   w w  w  . ja va  2s  .c om
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}