Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:hadoop.twitter.mapreduce.Main.java

public static void preProcess(String name, String input, String output) {
    try {//  w  ww . j a  v  a 2 s. com
        deleteFolder(output);
        Job job = Job.getInstance(conf, "(feryandi) " + name);
        job.setJarByClass(Preprocess.class);
        job.setMapperClass(UserMapper.class);
        job.setCombinerClass(UserReducer.class);
        job.setReducerClass(UserReducer.class);
        //job.setNumReduceTasks(8);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(UserWritable.class);
        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        while (job.waitForCompletion(true) ? false : true) {
        }
    } catch (Exception ex) {
        Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:hadoop.twitter.mapreduce.Main.java

public static void ranking(String name, String input, String output) {
    try {//  w w  w .  ja v a 2  s. com
        deleteFolder(output);
        Job job = Job.getInstance(conf, "(feryandi) " + name);
        job.setJarByClass(Ranking.class);
        job.setMapperClass(RankingMapper.class);
        job.setCombinerClass(RankingReducer.class);
        job.setReducerClass(RankingReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(UserWritable.class);
        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        while (job.waitForCompletion(true) ? false : true) {
        }
    } catch (Exception ex) {
        Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:hadoop.wordcount.partitioner.combiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   ww w . j a  va2s. c  o m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.prefix.WordCountAddPrefix.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from ww w  . j a  va2 s.  c om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountAddPrefix.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.WordCountDataLuogoMeteoArrivo.java

License:Apache License

public static void main(String[] args) throws Exception {
    long start = System.nanoTime();
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountDataLuogoMeteoArrivo.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("NewDatasetMobility.txt"));
    FileSystem fs;//www  . j  a  va 2s .  c o m
    try {
        fs = FileSystem.get(new Configuration());
        if (fs.exists(new Path("resultsHADOOP"))) {
            fs.delete(new Path("resultsHADOOP"), true);
            System.out.println("ELIMINATA");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        System.out.println("ERRORE");
        e.printStackTrace();
        System.out.println("ERRORE");
    }
    FileOutputFormat.setOutputPath(job, new Path("resultsHADOOP"));
    if (job.waitForCompletion(true)) {
        System.out.println("DONE");
        long end = System.nanoTime();
        long microseconds = (end - start) / 1000;
        double seconds = (double) microseconds / 1000000;
        double minutes = (double) seconds / 60;
        System.out.println("microsecondi: " + microseconds + " ");
        System.out.println("secondi: " + seconds);
        System.out.println("minuti: " + minutes);
        System.exit(0);
    } else {
        System.exit(1);
    }
}

From source file:hdfsBasicOper.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w  ww .j ava2s. c o m*/
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class); //combinereduce?
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        //FileInputFormat???)addInputPath??
        //, TextInputFormat?
        //TextInputFormat?splitsLineRecorderReader??<key, value>?
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.out.println("over");
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:hd_knn.HD_KNN.java

public static void main(String[] args) throws Exception {

    // argumentos
    // Variante KNN: 0 Normal, 1 Media, 2 Inversa de la distancia
    // Distancia a utilizar: 0 Euclidea, 1 Manhattan, 2 Chebyshev
    // k/*from  w w  w .  java 2 s .  c o m*/
    // test_file
    // Input path
    // Output path
    if (args.length != 6) {
        System.out.println("Arguments: knn_type distance k test_file input_path output_path");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    conf.set("knn_method", args[0]);
    conf.set("distance", args[1]);
    conf.setInt("k", Integer.parseInt(args[2]));
    conf.set("test_file", args[3]);
    Job job = Job.getInstance(conf, "KNN");
    job.setJarByClass(HD_KNN.class);
    job.setMapperClass(DistanceCalculatorMapper.class);
    job.setCombinerClass(PredictClassReducer.class);
    job.setReducerClass(PredictClassReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DistanceClassOutput.class);
    FileInputFormat.addInputPath(job, new Path(args[4]));
    FileOutputFormat.setOutputPath(job, new Path(args[5]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:idv.takeshi.miao.examples.WordCount.java

License:Apache License

/**
 * Extract a method for test easily./*from  ww  w  .java  2  s . co m*/
 * @param conf
 * @param otherArgs
 * @return
 * @throws IOException
 * @author scott_miao
 */
public static Job createJob(Configuration conf, String[] otherArgs) throws IOException {
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    return job;
}

From source file:ilps.hadoop.bin.CountAssessments.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//www.j  a  v a  2s .  c  o  m
    String out = null;
    String assessmentsfile = null;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else if ("-q".equals(args[i])) {
                assessmentsfile = args[++i];
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0) {
        return printUsage();
    }

    if (in == null || out == null || assessmentsfile == null)
        return printUsage();

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);
    LOG.info(" - assessments path: " + assessmentsfile);

    Configuration conf = getConf();
    conf.set("ASSESSMENTS_HDFS", assessmentsfile);
    Job job = new Job(conf, "Count assessments");
    job.setJarByClass(CountAssessments.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringLongPair.class);

    job.setCombinerClass(CountReducer.class);
    job.setReducerClass(CountReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringLongPair.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:ilps.hadoop.bin.CountGenres.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//w  w  w . j  a  v a2 s. c  o  m
    String out = null;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0) {
        return printUsage();
    }

    if (in == null || out == null)
        return printUsage();

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);

    Configuration conf = getConf();
    Job job = new Job(conf, "Count genres");
    job.setJarByClass(CountGenres.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;

}