Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:hadoop.twitter.mapreduce.Main.java

public static void preProcess(String name, String input, String output) {
    try {//  w  ww . j a  v  a 2 s. com
        deleteFolder(output);
        Job job = Job.getInstance(conf, "(feryandi) " + name);
        job.setJarByClass(Preprocess.class);
        job.setMapperClass(UserMapper.class);
        job.setCombinerClass(UserReducer.class);
        job.setReducerClass(UserReducer.class);
        //job.setNumReduceTasks(8);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(UserWritable.class);
        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        while (job.waitForCompletion(true) ? false : true) {
        }
    } catch (Exception ex) {
        Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:hadoop.twitter.mapreduce.Main.java

public static void ranking(String name, String input, String output) {
    try {//  w w  w .  ja v a 2  s. com
        deleteFolder(output);
        Job job = Job.getInstance(conf, "(feryandi) " + name);
        job.setJarByClass(Ranking.class);
        job.setMapperClass(RankingMapper.class);
        job.setCombinerClass(RankingReducer.class);
        job.setReducerClass(RankingReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(UserWritable.class);
        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        while (job.waitForCompletion(true) ? false : true) {
        }
    } catch (Exception ex) {
        Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:hadoop.wordcount.partitioner.combiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   ww w . j a  va2s. c  o m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.prefix.WordCountAddPrefix.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from ww w  . j a  va2 s.  c om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountAddPrefix.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.WordCountDataLuogoMeteoArrivo.java

License:Apache License

public static void main(String[] args) throws Exception {
    long start = System.nanoTime();
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountDataLuogoMeteoArrivo.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("NewDatasetMobility.txt"));
    FileSystem fs;//www  . j  a  va 2s .  c o m
    try {
        fs = FileSystem.get(new Configuration());
        if (fs.exists(new Path("resultsHADOOP"))) {
            fs.delete(new Path("resultsHADOOP"), true);
            System.out.println("ELIMINATA");
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        System.out.println("ERRORE");
        e.printStackTrace();
        System.out.println("ERRORE");
    }
    FileOutputFormat.setOutputPath(job, new Path("resultsHADOOP"));
    if (job.waitForCompletion(true)) {
        System.out.println("DONE");
        long end = System.nanoTime();
        long microseconds = (end - start) / 1000;
        double seconds = (double) microseconds / 1000000;
        double minutes = (double) seconds / 60;
        System.out.println("microsecondi: " + microseconds + " ");
        System.out.println("secondi: " + seconds);
        System.out.println("minuti: " + minutes);
        System.exit(0);
    } else {
        System.exit(1);
    }
}

From source file:hdfsBasicOper.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w  ww .j ava2s. c o m*/
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class); //combinereduce?
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        //FileInputFormat???)addInputPath??
        //, TextInputFormat?
        //TextInputFormat?splitsLineRecorderReader??<key, value>?
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.out.println("over");
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:hd_knn.HD_KNN.java

public static void main(String[] args) throws Exception {

    // argumentos
    // Variante KNN: 0 Normal, 1 Media, 2 Inversa de la distancia
    // Distancia a utilizar: 0 Euclidea, 1 Manhattan, 2 Chebyshev
    // k/*from  w w  w .  java 2 s .  c o m*/
    // test_file
    // Input path
    // Output path
    if (args.length != 6) {
        System.out.println("Arguments: knn_type distance k test_file input_path output_path");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    conf.set("knn_method", args[0]);
    conf.set("distance", args[1]);
    conf.setInt("k", Integer.parseInt(args[2]));
    conf.set("test_file", args[3]);
    Job job = Job.getInstance(conf, "KNN");
    job.setJarByClass(HD_KNN.class);
    job.setMapperClass(DistanceCalculatorMapper.class);
    job.setCombinerClass(PredictClassReducer.class);
    job.setReducerClass(PredictClassReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DistanceClassOutput.class);
    FileInputFormat.addInputPath(job, new Path(args[4]));
    FileOutputFormat.setOutputPath(job, new Path(args[5]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:idv.takeshi.miao.examples.WordCount.java

License:Apache License

/**
 * Extract a method for test easily./*from  ww  w  .java  2  s . co m*/
 * @param conf
 * @param otherArgs
 * @return
 * @throws IOException
 * @author scott_miao
 */
public static Job createJob(Configuration conf, String[] otherArgs) throws IOException {
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    return job;
}

From source file:ilps.hadoop.bin.CountAssessments.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//www.j  a  v a  2s .  c  o  m
    String out = null;
    String assessmentsfile = null;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else if ("-q".equals(args[i])) {
                assessmentsfile = args[++i];
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0) {
        return printUsage();
    }

    if (in == null || out == null || assessmentsfile == null)
        return printUsage();

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);
    LOG.info(" - assessments path: " + assessmentsfile);

    Configuration conf = getConf();
    conf.set("ASSESSMENTS_HDFS", assessmentsfile);
    Job job = new Job(conf, "Count assessments");
    job.setJarByClass(CountAssessments.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringLongPair.class);

    job.setCombinerClass(CountReducer.class);
    job.setReducerClass(CountReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringLongPair.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:ilps.hadoop.bin.CountGenres.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String in = null;//w  w  w . j  a  v a2 s. c  o  m
    String out = null;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-i".equals(args[i])) {
                in = args[++i];
            } else if ("-o".equals(args[i])) {
                out = args[++i];
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    if (other_args.size() > 0) {
        return printUsage();
    }

    if (in == null || out == null)
        return printUsage();

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + in);
    LOG.info(" - output path: " + out);

    Configuration conf = getConf();
    Job job = new Job(conf, "Count genres");
    job.setJarByClass(CountGenres.class);

    // some weird issues with Thrift classes in the Hadoop distro.
    job.setUserClassesTakesPrecedence(true);

    job.setInputFormatClass(ThriftFileInputFormat.class);
    job.setMapperClass(MyMapper.class);
    FileInputFormat.addInputPath(job, new Path(in));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);

    FileSystem.get(conf).delete(new Path(out), true);
    TextOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;

}