Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:proiectps.ProiectPS.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), MaxTemp.class);
    conf.setJobName("maxtemp");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*  www. ja  va  2 s . co  m*/
    return 0;
}

From source file:redpoll.clusterer.kmeans.KMeansDriver.java

License:Apache License

/**
 * Run the job using supplied arguments//  w ww.  j a v a2 s  . co m
 * 
 * @param input the directory pathname for input points
 * @param clustersIn the directory pathname for iniput clusters
 * @param clustersOut the directory pathname for output clusters
 * @param measureClass the classname of the DistanceMeasure
 * @param convergenceDelta the convergence delta value
 * @return true if the iteration successfully runs
 */
private static boolean runIteration(String input, String clustersIn, String clustersOut, String measureClass,
        String convergenceDelta) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(KMeansDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(WritableVector.class);

    FileInputFormat.setInputPaths(conf, new Path(input));
    Path outPath = new Path(clustersOut);
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setMapperClass(KMeansMapper.class);
    conf.setCombinerClass(KMeansCombiner.class);
    conf.setReducerClass(KMeansReducer.class);
    conf.setNumReduceTasks(1);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set(Cluster.CLUSTER_PATH_KEY, clustersIn);
    conf.set(Cluster.DISTANCE_MEASURE_KEY, measureClass);
    conf.set(Cluster.CLUSTER_CONVERGENCE_KEY, convergenceDelta);

    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    try {
        FileSystem fs = FileSystem.get(conf);
        loadClusters(clustersIn + "/part-00000", conf, fs);
        client.setConf(conf);
        JobClient.runJob(conf);
        return isConverged(clustersOut + "/part-00000", conf, fs);
    } catch (Exception e) {
        log.warn(e.toString(), e);
        return true;
    }
}

From source file:reverseIndexer.reverseIndexer.java

public static void startJob(String[] args) {

    try {/*  ww w .ja va2s .c o m*/
        /*
                
         Configuration conf = new Configuration();
         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
         if (otherArgs.length < 2) {
         System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
         System.exit(2);
         }
         Job job = new Job(conf, "reverse indexer");
         job.setJarByClass(this.getClass());
         job.setMapperClass(IndexerMapper.class);
         job.setReducerClass(IndexerReducer.class);
         job.setMapOutputKeyClass(Text.class);
         job.setMapOutputValueClass(LineRecWritable.class);
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Text.class);
                
                
         for (int i = 1; i < otherArgs.length; i++) {
         FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
         }
         FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
         System.exit(job.waitForCompletion(true) ? 0 : 1);*/

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        reverseIndexMapper Map = new reverseIndexMapper();
        conf.setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) Map.getClass());

        reverseIndexReducer Reduce = new reverseIndexReducer();
        conf.setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());
        conf.setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {
        Exp.printStackTrace();

    }
}

From source file:temp.WordCount.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.//from w w  w. j a v a 2  s  .  co  m
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCount.class);
    conf.setJobName("wordcount");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    conf.setInputPath(new Path(other_args.get(0)));
    conf.setOutputPath(new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:U.CC.SpeciesIterDriver2.java

public static void main(String[] args) {

    int i = 0;/*w  w w . j  a va2s. com*/

    while (i < 10) {
        JobClient client = new JobClient();
        JobConf conf = new JobConf(SpeciesIterDriver2.class);
        conf.setJobName("Species Iter");

        conf.setNumReduceTasks(5);

        //~dk
        //conf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); 
        //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        if (args.length < 2) {
            System.out.println("Usage: PageRankIter <input path> <output path>");
            System.exit(0);
        }

        //~dk
        //conf.setInputPath(new Path(args[0])); 
        //conf.setOutputPath(new Path(args[1])); 
        //FileInputFormat.setInputPaths(conf, new Path(args[0]));
        //FileOutputFormat.setOutputPath(conf, new Path(args[1]));
        FileInputFormat.setInputPaths(conf, "output" + i);
        int newFileVal = i + 1;
        FileOutputFormat.setOutputPath(conf, new Path("output" + newFileVal));
        //conf.setInputPath(new Path("graph2")); 
        //conf.setOutputPath(new Path("graph3")); 

        conf.setMapperClass(SpeciesIterMapper2.class);
        conf.setReducerClass(SpeciesIterReducer2.class);
        conf.setCombinerClass(SpeciesIterReducer2.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
        i++;

    }

}

From source file:ucsc.hadoop.mapreduce.apache.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from  w  w w . j  ava 2s.c o  m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[2]);
        if (args.length == 4) {
            grepJob.set("mapred.mapper.regex.group", args[3]);
        }

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormat(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        JobClient.runJob(grepJob);

        // second job
        JobConf sortJob = new JobConf(getConf(), Grep.class);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormat(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        // sort by decreasing freq
        sortJob.setOutputKeyComparatorClass(LongWritable.DecreasingComparator.class);

        JobClient.runJob(sortJob);
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:uk.bl.wa.hadoop.hosts.HostsReport.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), HostsReport.class);

    log.info("Adding logs...");
    String line;//  w w w  . ja  v a2s  .c  o  m
    BufferedReader br = new BufferedReader(new FileReader(args[0]));
    while ((line = br.readLine()) != null) {
        log.info("Adding " + line);
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    conf.setJarByClass(HostsReport.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapperClass(HostsReportMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCombinerClass(HostsReportReducer.class);
    conf.setReducerClass(HostsReportReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:validatenacluster.ValidateNacluster.java

public static void main(String[] args) throws Exception {

    JobConf conf = new JobConf(ValidateNacluster.class);
    conf.setJobName("Partition for Machine Count");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    JobClient.runJob(conf);//w  w w .  ja  v  a 2  s  .  com

}

From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java

License:Apache License

public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
        Class<? extends Reducer> combinerClass) throws IOException, URISyntaxException {
    JobConf conf = createJobConf(mapperClass, reducerClass);
    conf.setCombinerClass(combinerClass);

    return conf;/*from   w ww.  j av a  2s .c  om*/
}

From source file:yangqi.hadoop.sample.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w  w w .ja  va 2 s  .com*/
}