Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:com.aerospike.hadoop.examples.externaljoin.ExternalJoin.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, ExternalJoin.class);
    job.setJobName("AerospikeExternalJoin");

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    // job.setCombinerClass(Reduce.class);  // Reduce changes format.
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Session.class);

    job.setOutputFormat(SessionOutputFormat.class);

    for (int ii = 0; ii < args.length; ++ii)
        FileInputFormat.addInputPath(job, new Path(args[ii]));

    JobClient.runJob(job);//w  w  w. j av a  2  s.  c om

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.generateprofiles.GenerateProfiles.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, GenerateProfiles.class);
    job.setJobName("AerospikeGenerateProfiles");

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    // job.setCombinerClass(Reduce.class);  // Reduce changes format.
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Profile.class);

    job.setOutputFormat(ProfileOutputFormat.class);

    for (int ii = 0; ii < args.length; ++ii)
        FileInputFormat.addInputPath(job, new Path(args[ii]));

    JobClient.runJob(job);/*www  . j a va  2  s  .  c  om*/

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.sessionrollup.SessionRollup.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, SessionRollup.class);
    job.setJobName("AerospikeSessionRollup");

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    // job.setCombinerClass(Reduce.class);  // Reduce changes format.
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Session.class);

    job.setOutputFormat(SessionOutputFormat.class);

    for (int ii = 0; ii < args.length; ++ii)
        FileInputFormat.addInputPath(job, new Path(args[ii]));

    JobClient.runJob(job);// w w  w. j  a v a  2s  .  co m

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.wordcountinput.WordCountInput.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, WordCountInput.class);
    job.setJobName("AerospikeWordCountInput");

    job.setInputFormat(AerospikeInputFormat.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    JobClient.runJob(job);/*  ww w  .j a  v a2s  . co  m*/

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.wordcountoutput.WordCountOutput.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, WordCountOutput.class);
    job.setJobName("AerospikeWordCountOutput");

    for (int ii = 0; ii < args.length; ++ii) {
        FileInputFormat.addInputPath(job, new Path(args[ii]));
    }//from   w  w w . ja v  a 2  s.c om

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setOutputFormat(MyOutputFormat.class);

    JobClient.runJob(job);

    log.info("finished");
    return 0;
}

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *///from  w  w  w .j  a v a  2  s.c  o  m
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}

From source file:com.benchmark.mapred.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    JobConf conf;
    int depth = 5;
    int width = 9;
    int height = 10;
    Class<? extends Pentomino> pentClass;
    if (args.length == 0) {
        System.out.println("pentomino <output>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from w  w w . j  a va2 s  .c o m*/

    conf = new JobConf(getConf());
    width = conf.getInt("pent.width", width);
    height = conf.getInt("pent.height", height);
    depth = conf.getInt("pent.depth", depth);
    pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class);

    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, output);
        conf.setJarByClass(PentMap.class);

        conf.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        createInputDirectory(fileSys, input, pent, depth);

        // the keys are the prefix strings
        conf.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(PentMap.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(2000);
        conf.setNumReduceTasks(1);

        JobClient.runJob(conf);
    } finally {
        fileSys.delete(input, true);
    }
    return 0;
}

From source file:com.benchmark.mapred.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Grep <inDir> <outDir> <numreduces> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }//  w w  w.j  ava 2s. c  o m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        Date startIteration = new Date();
        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);
        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[3]);
        if (args.length == 5)
            grepJob.set("mapred.mapper.regex.group", args[4]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);
        grepJob.setNumReduceTasks(Integer.parseInt(args[2]));

        JobClient.runJob(grepJob);

        Date endIteration = new Date();
        System.out.println("The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000
                + " seconds.");
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.benchmark.mapred.Join.java

License:Apache License

/**
 * The main driver for sort program.//from   w  ww  . j a v  a2  s  . c  o m
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("join");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10);
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                num_maps = Integer.parseInt(args[++i]);
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumMapTasks(num_maps);
    jobConf.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    jobConf.setInputFormat(CompositeInputFormat.class);
    jobConf.set("mapred.join.expr",
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.benchmark.mapred.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/*from w  ww  .  ja  v a2 s  .  c o m*/
        return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    //set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(LongWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);

    return 0;
}