Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:ece465.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Map<String, String> env = System.getenv();
    Path coreSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/core-site.xml");
    Path hdfsSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/hdfs-site.xml");
    Path yarnSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/yarn-site.xml");
    Path mapredSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/mapred-site.xml");
    conf.addResource(coreSiteXml);/*from w ww. j  av a2  s  . c  o m*/
    conf.addResource(hdfsSiteXml);
    conf.addResource(yarnSiteXml);
    conf.addResource(mapredSiteXml);
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    Path inputPath = new Path(otherArgs[0]);
    System.out.println(inputPath);
    Path outputPath = new Path(otherArgs[1]);
    System.out.println(outputPath);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.bigdata.training.core.mapreduce.WordCount.java

public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException {
    {/*from  w  w  w  .ja  va 2 s  .co m*/

        System.out.println("arg[0]-->" + args[0]);
        System.out.println("arg[1]-->" + args[1]);

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(SimpleMapper.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

        System.out.println("Total Words:" + job.getCounters().findCounter(METRICS.TOTAL_WORDS).getValue());
    }
}

From source file:edu.bigdata.training.mrcassandra.MapReduceExample.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "MR Keying");
    job.setJarByClass(MapReduceExample.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("/user/root/input/all-shakespeare.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/user/root/output/"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.buffalo.cse.dic.mapreduce.WordCount.java

License:Apache License

@Override
public Map<String, Number> start(String inputFile) {
    try {//from  w  w  w  .  j ava 2  s. co m
        LinkedHashMap<String, Number> topTen = new LinkedHashMap<>();
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/core-site.xml"));
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/hdfs-site.xml"));

        FileSystem fs = FileSystem.get(new URI("wordcount"), conf);
        fs.delete(new Path("wordcount"));

        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(inputFile));
        FileOutputFormat.setOutputPath(job, new Path("wordcount"));
        job.waitForCompletion(true);
        System.out.println("word count done");

        FileSystem fsa = FileSystem.get(new URI("wordcount"), conf);
        fsa.delete(new Path("wordcountfinal"));

        Job sortJob = new Job(conf, "sort reducer");
        sortJob.setJarByClass(SortReducerOutput.class);
        sortJob.setMapperClass(OutputBreaker.class);
        sortJob.setSortComparatorClass(ReverseComparator.class);
        sortJob.setReducerClass(SortByCount.class);
        sortJob.setOutputKeyClass(IntWritable.class);
        sortJob.setOutputValueClass(Text.class);
        sortJob.setPartitionerClass(TotalOrderPartitioner.class);
        Path partitionFile = new Path("trendcount", "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(sortJob.getConfiguration(), partitionFile);
        FileInputFormat.addInputPath(sortJob, new Path("wordcount/part-r-00000"));
        FileOutputFormat.setOutputPath(sortJob, new Path("wordcountfinal"));
        sortJob.waitForCompletion(true);
        System.out.println("sort word count");

        Path output = new Path("wordcountfinal/part-r-00000");
        FileSystem fileSystem = FileSystem.get(output.toUri(), conf);
        FileStatus[] items = fileSystem.listStatus(output);
        for (FileStatus item : items) {
            InputStream stream = null;
            // ignoring files like _SUCCESS
            if (item.getPath().getName().startsWith("_")) {
                continue;
            } else {
                stream = fileSystem.open(item.getPath());
            }
            Scanner scan = new Scanner(stream).useDelimiter("\\n");
            for (int i = 0; i < 10; i++) {
                if (scan.hasNext()) {
                    String data = scan.next();
                    topTen.put(data.split("\\t")[1], Integer.parseInt(data.split("\\t")[0]));
                }
            }
        }
        return topTen;
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:edu.columbia.hs2807.Sentiment.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "sentiment");

    job.setJarByClass(Sentiment.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Combine.class);
    job.setReducerClass(Reduce.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LongArrayWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.cooper.cloud.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/* w  w  w  .  jav a 2s .c  om*/
        return 2;
    }

    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the edu.cooper.cloud.Normalize Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:edu.cooper.cloud.Normalize.java

License:Apache License

public static void main(String[] args) throws Exception {

    String input = "datasets/train_subject01.csv";
    String output = "output/trainX2.csv";

    Configuration conf = new Configuration();
    Map<String, String> env = System.getenv();
    Path coreSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/core-site.xml");
    Path hdfsSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/hdfs-site.xml");
    Path yarnSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/yarn-site.xml");
    Path mapredSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/mapred-site.xml");
    conf.addResource(coreSiteXml);/*w  w w  . j  av  a 2s.  c o  m*/
    conf.addResource(hdfsSiteXml);
    conf.addResource(yarnSiteXml);
    conf.addResource(mapredSiteXml);

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length != 2) {
    //            System.err.println("Usage: wordcount <in> <out>");
    //            System.exit(2);
    //        }

    Job job = new Job(conf, "normalize");
    job.setJarByClass(Normalize.class);
    job.setMapperClass(NormalizeMapper.class);
    job.setCombinerClass(NormalizeCombiner.class);
    job.setReducerClass(NormalizeReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleArrayWritable.class);
    //        job.setInputFormatClass(new FileInputFormat<IntWritable,DoubleArrayWritable>());

    Path inputPath = new Path(input);
    System.out.println(inputPath);
    Path outputPath = new Path(output);
    System.out.println(outputPath);

    NLineInputFormat.addInputPath(job, inputPath);
    //        FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    //        Use means and std dev to normalize the data

}

From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java

License:Apache License

/**
 * Run the job using supplied arguments// ww w  .ja va  2s . c om
 * 
 * @param input
 *            the directory pathname for input points
 * @param clustersIn
 *            the directory pathname for input clusters
 * @param clustersOut
 *            the directory pathname for output clusters
 * @param measureClass
 *            the classname of the DistanceMeasure
 * @param convergenceDelta
 *            the convergence delta value
 * 
 * @return true if the iteration successfully runs
 */
private static boolean runIteration(Configuration conf, Path input, Path clustersOut, String measureClass,
        String convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, convergenceDelta);

    Job job = new Job(conf, "KMeans Driver running runIteration ");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ClusterObservations.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Cluster.class);

    //      job.setInputFormatClass(SequenceFileInputFormat.class);
    //      job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setInputFormatClass(MemIDInputFormat.class);
    job.setOutputFormatClass(MemCachedOutputFormat.class);
    job.setMapperClass(MemKMeansMapper.class);
    job.setCombinerClass(KMeansCombiner.class); // ??
    job.setReducerClass(MemKMeansReducer.class);

    FileInputFormat.addInputPath(job, input); // input is id list
    FileOutputFormat.setOutputPath(job, clustersOut);

    job.setJarByClass(MemCachedKMeansDriver.class);
    HadoopUtil.delete(conf, clustersOut);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("K-Means Iteration failed processing ");
    }

    return isConverged(conf);
}

From source file:edu.isi.mavuno.app.ie.HarvestSAPInstances.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.CorpusClass", conf);
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.MinMatches", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.OutputPath", conf);

    sLogger.info("Tool name: HarvestSAPInstances");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestSAPInstances");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);/*from   ww w.  j  ava 2s  .  c  o  m*/
    return 0;
}

From source file:edu.isi.mavuno.app.mine.HarvestContextPatternPairs.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.ExtractorClass",
            conf);//from w  w  w  .j  av  a 2  s. c o  m
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.ExtractorArgs",
            conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.MinMatches", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.OutputPath", conf);

    sLogger.info("Tool name: HarvestContextPatternPairs");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestContextPatternPairs");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    job.setMapOutputKeyClass(ContextPatternWritable.class);
    job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
    job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(ContextPatternWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);

    return 0;
}