Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*from  w ww .jav  a2 s. com*/
 * <ul>
 * <li>Inspects the table to configure a total order partitioner</li>
 * <li>Uploads the partitions file to the cluster and adds it to the
 * DistributedCache</li>
 * <li>Sets the number of reduce tasks to match the current number of
 * regions</li>
 * <li>Sets the output key/value class to match HFileOutputFormat's
 * requirements</li>
 * <li>Sets the reducer up to perform the appropriate sorting (either
 * KeyValueSortReducer or PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either
 * KeyValue or Put before running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table,
        Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(hfileOutputFormatBase);

    // Based on the configured map output class, set the correct reducer to
    // properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    // TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.ckelsel.hadoop.mapreduce.WordCount.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: EventCount <in> <out>");
        System.exit(2);//from ww w .j  av a 2s . co  m
    }
    Job job = Job.getInstance(conf, "event count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

    // delete output if exists
    Path outPath = new Path(otherArgs[1]);
    outPath.getFileSystem(conf).delete(outPath, true);
    FileOutputFormat.setOutputPath(job, outPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);/*from w w w  .j  a  v a  2  s  . c om*/
    try {
        final int totalMapSlots = getConf().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
        if (-1 == options.test.numRows) {
            options.test.numRows = totalMapSlots;
        }
        final TableOperations ops = options.connection.getConnector().tableOperations();
        final List<String> names = options.test.getTableNames(ops);
        int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0);
        if (-1 != options.test.numReduceSlots) {
            totalReduceSlots = options.test.numReduceSlots;
        }
        if (0 == totalReduceSlots) {
            totalReduceSlots = names.size();
        }
        final int reducesPerJob = Math.max(1, totalReduceSlots / names.size());

        final List<Job> jobs = new ArrayList();
        for (String name : names) {
            final Job job = new Job(getConf(), jobName + " " + name);
            job.setJarByClass(this.getClass());
            options.input.useAccumuloInputFormat(job, name);
            job.setMapperClass(DataVerifyMapper.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            job.setReducerClass(LongSumReducer.class);
            job.setCombinerClass(LongSumReducer.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job, new Path(options.test.output, name));
            job.setNumReduceTasks(reducesPerJob);
            job.submit();
            jobs.add(job);
        }

        boolean success = true;
        final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length;
        final long numCellsPerFamily = options.test.qualifiers * options.test.numRows;
        for (Job job : jobs) {
            success &= job.waitForCompletion(true);
            final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName());
            if (null == group) {
                log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper.");
                success = false;
            } else {
                final Counter badCounter = group.findCounter(BAD_COUNTER);
                if (null != badCounter && 0 < badCounter.getValue()) {
                    log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue()
                            + " entries with bad checksums.");
                    success = false;
                }
                int numRows = 0;
                int numFamilies = 0;
                for (Counter counter : group) {
                    if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) {
                        numRows++;
                        if (numCellsPerRow != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerRow + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) {
                        numFamilies++;
                        if (numCellsPerFamily != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerFamily + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    }
                }
                if (options.test.numRows != numRows) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows
                            + " rows, but has " + numRows);
                    success = false;
                }
                if (DataCompatibilityLoad.FAMILIES.length != numFamilies) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have "
                            + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies);
                    success = false;
                }
            }
        }
        if (success) {
            log.info("All internal checks passed.");
        } else {
            log.info("Some checks failed. see log.");
        }
        return success ? 0 : 1;
    } finally {
        options.input.close();
    }
}

From source file:com.cloudera.avro.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: AvroWordCount <input path> <output path>");
        return -1;
    }//from   www.j a v  a2 s.c o m

    Job job = new Job(getConf());
    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("wordcount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

    return 0;
}

From source file:com.cloudera.avro.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }/*w  w w  .j av a2s  . c  om*/

    Job job = new Job(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    AvroJob.setMapOutputValueSchema(job, User.getClassSchema());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.ByteCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(new Configuration());

    // Trim off the hadoop-specific args
    String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Pull in properties
    Options options = new Options();

    Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
            .withDescription("use value for given property").create("D");
    options.addOption(property);/*from   w  ww  . j  av  a  2  s . c o  m*/

    Option skipChecksums = new Option("skipChecksums", "skip checksums");
    options.addOption(skipChecksums);

    Option profile = new Option("profile", "profile tasks");
    options.addOption(profile);

    CommandLineParser parser = new BasicParser();
    CommandLine line = parser.parse(options, remArgs);

    Properties properties = line.getOptionProperties("D");
    for (Entry<Object, Object> prop : properties.entrySet()) {
        conf.set(prop.getKey().toString(), prop.getValue().toString());
        System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue());
    }

    if (line.hasOption("skipChecksums")) {
        conf.setBoolean("bytecount.skipChecksums", true);
        System.out.println("Skipping checksums");
    }

    if (line.hasOption("profile")) {
        conf.setBoolean("mapred.task.profile", true);
        conf.set("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s");
        conf.set(MRJobConfig.NUM_MAP_PROFILES, "0");
        conf.set("mapred.task.profile.maps", "1");
        System.out.println("Profiling map tasks");
    }

    // Get the positional arguments out
    remArgs = line.getArgs();
    if (remArgs.length != 2) {
        System.err.println("Usage: ByteCount <inputBase> <outputBase>");
        System.exit(1);
    }
    String inputBase = remArgs[0];
    String outputBase = remArgs[1];

    Job job = Job.getInstance(conf);

    job.setInputFormatClass(ByteBufferInputFormat.class);

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(ByteCountMapper.class);
    job.setReducerClass(ByteCountReducer.class);
    job.setCombinerClass(ByteCountReducer.class);

    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputBase));
    FileOutputFormat.setOutputPath(job, new Path(outputBase));

    job.setJarByClass(ByteCount.class);

    boolean success = job.waitForCompletion(true);

    Counters counters = job.getCounters();
    System.out.println("\tRead counters");
    printCounter(counters, READ_COUNTER.BYTES_READ);
    printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ);
    printCounter(counters, READ_COUNTER.SCR_BYTES_READ);
    printCounter(counters, READ_COUNTER.ZCR_BYTES_READ);

    System.exit(success ? 0 : 1);
}

From source file:com.cloudera.castagna.logparser.mr.StatusCodesStats.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from  w  w  w . ja v  a 2s .c o  m*/

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT,
            Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = Job.getInstance(configuration);
    job.setJobName(Constants.STATUS_CODES_STATS);
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(StatusCodesStatsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setCombinerClass(StatusCodesStatsCombiner.class);

    job.setReducerClass(StatusCodesStatsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.crunch.impl.mr.plan.JobPrototype.java

License:Open Source License

private CrunchJob build(Class<?> jarClass, Configuration conf) throws IOException {
    Job job = new Job(conf);
    conf = job.getConfiguration();//from ww w.j av  a  2  s .  c o m
    job.setJarByClass(jarClass);

    Set<DoNode> outputNodes = Sets.newHashSet();
    Set<Target> targets = targetsToNodePaths.keySet();
    MSCROutputHandler outputHandler = new MSCROutputHandler(job, workingPath, group == null);
    for (Target target : targets) {
        DoNode node = null;
        for (NodePath nodePath : targetsToNodePaths.get(target)) {
            if (node == null) {
                PCollectionImpl collect = nodePath.tail();
                node = DoNode.createOutputNode(target.toString(), collect.getPType());
                outputHandler.configureNode(node, target);
            }
            outputNodes.add(walkPath(nodePath.descendingIterator(), node));
        }
    }

    job.setMapperClass(CrunchMapper.class);
    List<DoNode> inputNodes;
    DoNode reduceNode = null;
    RTNodeSerializer serializer = new RTNodeSerializer();
    if (group != null) {
        job.setReducerClass(CrunchReducer.class);
        List<DoNode> reduceNodes = Lists.newArrayList(outputNodes);
        reduceNode = reduceNodes.get(0);
        serializer.serialize(reduceNodes, conf, NodeContext.REDUCE);

        group.configureShuffle(job);

        DoNode mapOutputNode = group.getGroupingNode();
        if (reduceNodes.size() == 1 && combineFnTable != null) {
            // Handle the combiner case
            DoNode mapSideCombineNode = combineFnTable.createDoNode();
            mapSideCombineNode.addChild(mapOutputNode);
            mapOutputNode = mapSideCombineNode;
        }

        Set<DoNode> mapNodes = Sets.newHashSet();
        for (NodePath nodePath : mapNodePaths) {
            // Advance these one step, since we've already configured
            // the grouping node, and the PGroupedTableImpl is the tail
            // of the NodePath.
            Iterator<PCollectionImpl> iter = nodePath.descendingIterator();
            iter.next();
            mapNodes.add(walkPath(iter, mapOutputNode));
        }
        inputNodes = Lists.newArrayList(mapNodes);
        serializer.serialize(inputNodes, conf, NodeContext.MAP);
    } else { // No grouping
        job.setNumReduceTasks(0);
        inputNodes = Lists.newArrayList(outputNodes);
        serializer.serialize(inputNodes, conf, NodeContext.MAP);
    }

    if (inputNodes.size() == 1) {
        DoNode inputNode = inputNodes.get(0);
        inputNode.getSource().configureSource(job, -1);
    } else {
        for (int i = 0; i < inputNodes.size(); i++) {
            DoNode inputNode = inputNodes.get(i);
            inputNode.getSource().configureSource(job, i);
        }
        job.setInputFormatClass(CrunchInputFormat.class);
    }
    job.setJobName(createJobName(inputNodes, reduceNode));

    return new CrunchJob(job, workingPath, outputHandler);
}

From source file:com.cloudera.hbase.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 2;
    }/* w  w  w.  j a v a2s.c o  m*/

    Configuration conf = getConf();

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.avro.mapreduce.ColorCount.java

License:Apache License

/**
 * Run the MR2 color count with generic records, and return a map of favorite colors to
 * the number of users.//from   ww  w . j  ava 2s.c  o  m
 */
public static java.util.Map<String, Integer> countColors()
        throws IOException, ClassNotFoundException, InterruptedException {
    String output = TestUtil.getTempDirectory();
    Path outputPath = new Path(output);
    JobConf conf = new JobConf(ColorCount.class);
    conf.setInt("mapreduce.job.reduces", 1);

    Job job = Job.getInstance(conf);
    job.setJarByClass(ColorCount.class);
    job.setJobName("MR2 Color Count With Generic Records");

    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(Reduce.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.waitForCompletion(false);

    // Read the result and return it. Since we set the number of reducers to 1,
    // there is always just one file containing the value.
    SeekableInput input = new FsInput(new Path(output + "/part-r-00000.avro"), conf);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
    java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>();
    for (GenericRecord datum : fileReader) {
        colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString()));
    }
    return colorMap;
}