Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls)
        throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormatBase.class);

    // Based on the configured map output class, set the correct reducer to
    // properly//from   w w w.j  av  a  2s.  com
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    // TableMapReduceUtil.addDependencyJars(job);// 
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.neusoft.hbase.test.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  ww w  .ja  v a  2  s . c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.nistfortunetellers.cleaning.NISTClean.java

License:Apache License

/** Runs a Job that is Text in and Out, and TextInput in and out, too! */
@SuppressWarnings({ "deprecation", "rawtypes" })
static void runTextJob(String jobName, Configuration jobConfig, String inputPath, String outputPath,
        Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) {
    try {// w  w w .ja v a  2s. co  m
        Job genericJob = new Job(jobConfig, jobName);
        // DEBUG
        //genericJob.setNumReduceTasks(0);
        // END DEBUG
        genericJob.setJarByClass(NISTClean.class);
        genericJob.setOutputKeyClass(Text.class);
        genericJob.setOutputValueClass(Text.class);
        genericJob.setMapperClass(mapper);
        genericJob.setReducerClass(reducer);
        genericJob.setInputFormatClass(TextInputFormat.class);
        genericJob.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(genericJob, new Path(inputPath));
        FileOutputFormat.setOutputPath(genericJob, new Path(outputPath));
        genericJob.waitForCompletion(true);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.niuwa.hadoop.chubao.job.IndicatorJob002.java

License:Apache License

@Override
public void setJobSpecialInfo(Job job, Configuration conf, RunParams params, Map<String, Path> tempPaths)
        throws Exception {

    job.setMapperClass(IndicatorJob002.UserIdMapper.class);
    job.setCombinerClass(IndicatorJob002.CombinerSumReducer.class);
    job.setReducerClass(IndicatorJob002.IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // //from w ww  . ja  v a2s.  c om
    FileInputFormat.addInputPath(job, ChubaoJobConfig.getInputPath(ChubaoJobConfig.INPUT_CONTACT));
    // 
    FileOutputFormat.setOutputPath(job, tempPaths.get(IndicatorJob002.class.getName()));
    // 
    HadoopUtil.deleteOutputFile(tempPaths.get(IndicatorJob002.class.getName()));

}

From source file:com.niuwa.hadoop.jobs.sample.JobControlTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//from  w  w  w.j  av a  2  s  . co  m
    Configuration conf = new Configuration();
    String path = "hdfs://ns1:9000/user/root";
    if (args.length != 0) {
        path = args[0];
    }
    String[] args_1 = new String[] { path + "/chubao/input/contact",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total",
            path + "/chubao/temp/" + DateUtil.format(new Date()) + "/contact_total_next" };
    String[] otherArgs = new GenericOptionsParser(conf, args_1).getRemainingArgs();
    // job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(JobControlTest.class);
    job.setMapperClass(UserIdMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    // 
    deleteOutputFile(otherArgs[1], otherArgs[0]);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // job
    Job job2 = Job.getInstance(conf, "job2");
    job2.setJarByClass(JobControlTest.class);
    job2.setMapperClass(AddDateMapper.class);
    job2.setReducerClass(Job2Reducer.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    // 
    deleteOutputFile(otherArgs[2], otherArgs[1]);
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

    // ControlledJob
    ControlledJob controlledJob1 = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());

    // ?
    controlledJob2.addDependingJob(controlledJob1);

    // JobControl
    JobControl jobControl = new JobControl("JobControlDemoGroup");
    jobControl.addJob(controlledJob1);
    jobControl.addJob(controlledJob2);

    // ?
    Thread jobControlThread = new Thread(jobControl);
    jobControlThread.start();
    while (true) {
        if (jobControl.allFinished()) {
            System.out.println(jobControl.getSuccessfulJobList());
            jobControl.stop();
            break;
        }
    }
}

From source file:com.niuwa.hadoop.jobs.sample.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    HadoopUtil.isWinOrLiux();//from   w  w w .j av  a  2s.  co  m
    Configuration conf = new Configuration();
    args = new String[] { "hdfs://192.168.101.219:9000/user/root/input",
            "hdfs://192.168.101.219:9000/user/root/output/count" + new Date().getTime() };
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.obomprogramador.bigdata.sentiment.sentimentanalysis.Sentiment.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 3) {
        System.err.println("Usage: Sentiment <sentiwordnet file> <in> <out>");
        System.exit(2);/*from  w  w w. j  a  v a  2 s.c  om*/
    }
    System.out.println("Param1: " + otherArgs[0] + " Param2: " + otherArgs[1] + " Param3: " + otherArgs[2]);
    conf.set("sentwordnetfile", otherArgs[0]);
    Job job = Job.getInstance(conf);
    job.setJarByClass(Sentiment.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.pagerankcalculator.TwitterPageRank.java

/**
 * Graph Parsing/*w w w. java2 s  .  co m*/
 * Memasukan data mentah dan melakukan inisialisasi pagerank
 * 
 * @param in file data masukan
 * @param out direktori output
 */
public int parseGraph(String in, String out) throws IOException, InterruptedException, ClassNotFoundException {

    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#1 Parsing Graph");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(GraphParsingMapper.class);
    job.setReducerClass(GraphParsingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int calculatePagerank(String in, String out, int iteration)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankCalculationMapper.class);
    job.setReducerClass(PageRankCalculationReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }/*from   w ww  . java2 s  .  c o m*/

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int sortPagerank(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankSortingMapper.class);
    job.setReducerClass(PageRankSortingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(1);/*from   w  w w.j  a  va2 s .  com*/

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setSortComparatorClass(DoubleSortDescComparator.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}