Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.mycompany.searcher.Searcher.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    System.out.print("Please input a keyword:\t");
    conf.set(KEYWORD, in.readLine());//from   w  w w  .  java 2  s  .co m
    conf.set(MINIMUM, args[2]);

    Job job = Job.getInstance(conf, "keyword search");

    job.setJarByClass(Searcher.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    clearOutput(conf, new Path(args[1]));
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
    TimeUnit.SECONDS.sleep(1);

    in = new BufferedReader(
            new InputStreamReader(FileSystem.get(conf).open(new Path(args[1] + "/part-r-00000")), "UTF-8"));
    String line;
    HashMap<String, Integer> map = new HashMap();
    while ((line = in.readLine()) != null) {
        StringTokenizer tok = new StringTokenizer(line);
        map.put(tok.nextToken(), new Integer(tok.nextToken()));
    }

    List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
    Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
        public int compare(Map.Entry<String, Integer> entry1, Map.Entry<String, Integer> entry2) {
            return (entry2.getValue() - entry1.getValue());
        }
    });
    for (Map.Entry<String, Integer> entry : list) {
        in = new BufferedReader(
                new InputStreamReader(FileSystem.get(conf).open(new Path(entry.getKey())), "UTF-8"));
        System.out.println("\n" + in.readLine());
        System.out.println("\n" + in.readLine() + ":" + entry.getValue() + "\n");
    }
}

From source file:com.nearinfinity.blur.mapreduce.BlurTask.java

License:Apache License

public Job configureJob(Configuration configuration) throws IOException {
    if (getIndexingType() == INDEXING_TYPE.UPDATE) {
        checkTable();/*  ww w .  ja v a2s.  c  om*/
    }
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    DataOutputStream output = new DataOutputStream(os);
    write(output);
    output.close();
    String blurTask = new String(Base64.encodeBase64(os.toByteArray()));
    configuration.set(BLUR_BLURTASK, blurTask);

    Job job = new Job(configuration, "Blur Indexer");
    job.setReducerClass(BlurReducer.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BlurMutate.class);
    job.setNumReduceTasks(getNumReducers(configuration));
    return job;
}

From source file:com.netflix.Aegisthus.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());

    job.setJarByClass(Aegisthus.class);
    CommandLine cl = getOptions(args);/*from  w w  w  .j  a  va 2  s .c o  m*/
    if (cl == null) {
        return 1;
    }
    job.setInputFormatClass(AegisthusInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(CassReducer.class);
    List<Path> paths = Lists.newArrayList();
    if (cl.hasOption(OPT_INPUT)) {
        for (String input : cl.getOptionValues(OPT_INPUT)) {
            paths.add(new Path(input));
        }
    }
    if (cl.hasOption(OPT_INPUTDIR)) {
        paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR)));
    }
    TextInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
    TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT)));

    job.submit();
    System.out.println(job.getJobID());
    System.out.println(job.getTrackingURL());
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>");
        System.exit(2);/*from  www .ja  v  a2 s .  c  om*/
    }
    Job job = new Job(conf, "AvgTimeToAnsPerHashTag");
    job.setJarByClass(AvgTimeToAnsPerHashTag.class);
    job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class);
    job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.JoinQA.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: JoinQA <in> <out>");
        System.exit(2);//  w w  w . ja v  a2  s  . c om
    }
    Job job = new Job(conf, "JoinQA");
    job.setJarByClass(JoinQA.class);
    job.setMapperClass(JoinQAMapper.class);
    job.setReducerClass(JoinQAReducer.class);
    job.setOutputKeyClass(JoinQAKey.class);
    job.setOutputValueClass(JoinQAValue.class);
    job.setPartitionerClass(JoinQAPartitioner.class);
    job.setGroupingComparatorClass(JoinQAGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    boolean isSucess = false;

    isSucess = job.waitForCompletion(true);

    if (isSucess) {
        // On successful completion of JoinQA start UserAnswerCountPerHashTag
        System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag...");
        String[] argsForMR2 = new String[2];
        argsForMR2[0] = otherArgs[1];
        argsForMR2[1] = otherArgs[1] + "MR2";
        isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2);

        if (isSucess) {
            // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag
            System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag...");
            String[] argsForMR3 = new String[2];
            argsForMR3[0] = argsForMR2[1];
            argsForMR3[1] = argsForMR2[1] + "MR3";
            isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3);
            if (isSucess) {
                // Successfully complete TopKPerHashTag MR
                System.out.println("All MR - Successful.");
            } else {
                // Failed UserAnswerCountPerHashTag MR
                System.out.println("MR - TopKPerHashTag failed.");
            }
        } else {
            // On unsuccessful completion of JoinQA end MR
            System.out.println("MR - UserAnswerCountPerHashTag failed.");
        }

    } else {
        // On unsuccessful completion of JoinQA end MR
        System.out.println("MR - JoinQA failed.");
    }

    System.exit(isSucess ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java

License:Apache License

public static boolean initTopKPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TopKPerHashTag <in> <out>");
        System.exit(2);/*from   www . jav  a2 s.  co  m*/
    }
    Job job = new Job(conf, "TopKPerHashTag");
    job.setJarByClass(TopKPerHashTag.class);
    job.setMapperClass(TopKPerHashTagMapper.class);
    job.setReducerClass(TopKPerHashTagReducer.class);
    job.setOutputKeyClass(TopKPerHashTagKey.class);
    job.setOutputValueClass(Text.class);
    job.setPartitionerClass(TopKPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java

License:Apache License

public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>");
        System.exit(2);//from w  ww .  j a  v a  2  s  .c o  m
    }
    Job job = new Job(conf, "UserAnswerCountPerHashTag");
    job.setJarByClass(UserAnswerCountPerHashTag.class);
    job.setMapperClass(UserAnswerCountPerHashTagMapper.class);
    job.setReducerClass(UserAnswerCountPerHashTagReducer.class);
    job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.Xml2csvComments.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);//from   w  ww .j a  v  a  2 s.c  om
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(CommentsMapper.class);
    job.setReducerClass(CommentsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(10);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.Xml2csvPosts.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csvPosts <in> <out>");
        System.exit(2);/*from  w  ww  . j  av  a 2  s  .  c om*/
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csvPosts.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(PostsMapper.class);
    job.setReducerClass(PostsReducer.class);
    job.setPartitionerClass(PostsPartitioner.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    // Set as per your file size
    job.setNumReduceTasks(15);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java

License:Apache License

static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls)
        throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {//from   www  .  j a  v a2s .c o m
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}