Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:GetUserInfoGivenMovieId.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("movieId", args[2]);
    Job job = new Job(conf, "GetUserInfoGivenMovieId");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setJarByClass(GetUserInfoGivenMovieId.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[3]));

    boolean flag1 = job.waitForCompletion(true);

    if (flag1) {//  ww  w.  jav a 2s .co  m
        Configuration conf2 = new Configuration();
        //FileSystem fs = FileSystem.get(conf2);
        //Path Intermediate = new Path(args[1]);
        //DistributedCache.addCacheFile(Intermediate.toUri(), conf2);
        //DistributedCache.addCacheFile(new URI(args[1]),conf2);

        Job job2 = new Job(conf2, "UserInfo");
        /*Job job2 = new Job(new Configuration());
        Configuration conf2 = job.getConfiguration();
        job2.setJobName("Join with Cache");
        DistributedCache.addCacheFile(new URI(args[1]), conf2);*/
        job2.addCacheFile(new URI(args[1]));
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(GetUserInfoGivenMovieId.class);
        job2.setMapperClass(MapWithJoin.class);
        //job2.setCombinerClass(Reduce.class);
        job2.setReducerClass(ReduceFinal.class);

        job2.setInputFormatClass(TextInputFormat.class);
        job2.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));
        job2.waitForCompletion(true);
    }
}

From source file:CrimenCasosTotales.java

public static void main(String args[]) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "casostotales");
    job.setJarByClass(WordCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Edge.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* ww  w.  j av  a  2 s . co  m*/
    }

    Path tempDir = new Path("/temp/edge");

    Job job = new Job(conf, "word count");
    job.setJarByClass(Edge.class);

    job.setMapperClass(SplitMapper.class);
    job.setCombinerClass(DuplicateCombiner.class);

    //job.setSortComparatorClass(DecentComparator.class);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, tempDir);

    if (job.waitForCompletion(true)) {

        Job job2 = new Job(conf, "edge");
        job2.setJarByClass(Edge.class);
        job2.setMapperClass(SwitchMapper.class);
        job2.setSortComparatorClass(DecentComparator.class);
        job2.setReducerClass(SwitchReducer.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job2, tempDir);
        FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));

        System.exit(job2.waitForCompletion(true) ? 0 : 1);
    }

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:LinkedGraph.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w  w w .  ja  v a2  s  .com
    }
    Job job = new Job(conf, "Graph");
    job.setJarByClass(LinkedGraph.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:MarkovStateTransitionModel.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Markov tate transition model";
    job.setJobName(jobName);//from  w  w  w . ja  va  2s .  c  o  m

    job.setJarByClass(MarkovStateTransitionModel.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(MarkovStateTransitionModel.StateTransitionMapper.class);
    job.setReducerClass(MarkovStateTransitionModel.StateTransitionReducer.class);
    job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:MaleUsersBelow7Years.java

public static void main(String args[]) throws Exception {
    Configuration configuration = new Configuration();

    Job job = new Job(configuration, "CountMaleUsersLessThan7");
    job.setJarByClass(MaleUsersBelow7Years.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reducer.class);
    job.setCombinerClass(Reducer.class);

    //set output and input formats;mapper-input reducer-output
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0])); //path for input file
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // Path for output file
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PrimeDivisor.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  w w  w.  ja  v a2  s  . c  om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PrimeDivisor.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComputeCooccurrenceMatrixPairs.java

License:Apache License

/**
 * Runs this tool./*from   w  ww .j av  a 2  s  . c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixPairs.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixPairs.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixPairs.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:GenIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w  ww . j a  v  a 2  s .c om*/
    }
    String tmpPath = "/local_scratch/wordcount/tmp";
    String stopWord = "/local_scratch/wordcount/stopword";

    // Job to count the words
    Job count_job = new Job(conf, "word count");
    count_job.setJarByClass(GenIndex.class);
    count_job.setMapperClass(Mapper1_Count.class);
    count_job.setCombinerClass(Reducer1_Count.class);
    count_job.setReducerClass(Reducer1_Count.class);

    count_job.setOutputKeyClass(Text.class);
    count_job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(count_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(count_job, new Path(tmpPath));
    count_job.waitForCompletion(true);

    Job sort_job = new Job(conf, "word sort");
    sort_job.setJarByClass(GenIndex.class);
    sort_job.setMapperClass(Mapper2_Sort.class);
    sort_job.setCombinerClass(Reducer2_Sort.class);
    sort_job.setReducerClass(Reducer2_Sort.class);
    sort_job.setSortComparatorClass(SortReducerByValuesKeyComparator.class);
    sort_job.setOutputKeyClass(IntWritable.class);
    sort_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sort_job, new Path(tmpPath));
    FileOutputFormat.setOutputPath(sort_job, new Path(stopWord));

    sort_job.waitForCompletion(true);

    // job to generate the index
    Job index_job = new Job(conf, "word index");
    index_job.setJarByClass(GenIndex.class);
    index_job.setMapperClass(Mapper3_index.class);
    index_job.setCombinerClass(Reducer3_index.class);
    index_job.setReducerClass(Reducer3_index.class);

    index_job.setOutputKeyClass(Text.class);
    index_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(index_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(index_job, new Path(otherArgs[1]));

    index_job.waitForCompletion(true);

    System.exit(0);
}

From source file:BigramRelativeFrequencyJson.java

License:Apache License

/**
 * Runs this tool.//from w  w  w . j a  v a2 s . co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int reduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool name: " + BigramRelativeFrequencyJson.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyJson.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyJson.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(MyTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(MyTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}