Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:proiectps.ProiectPS.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), MaxTemp.class);
    conf.setJobName("maxtemp");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*  www. ja  va  2 s . co  m*/
    return 0;
}

From source file:redpoll.clusterer.kmeans.KMeansDriver.java

License:Apache License

/**
 * Run the job using supplied arguments//  w ww.  j a v a2 s  . co m
 * 
 * @param input the directory pathname for input points
 * @param clustersIn the directory pathname for iniput clusters
 * @param clustersOut the directory pathname for output clusters
 * @param measureClass the classname of the DistanceMeasure
 * @param convergenceDelta the convergence delta value
 * @return true if the iteration successfully runs
 */
private static boolean runIteration(String input, String clustersIn, String clustersOut, String measureClass,
        String convergenceDelta) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(KMeansDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(WritableVector.class);

    FileInputFormat.setInputPaths(conf, new Path(input));
    Path outPath = new Path(clustersOut);
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setMapperClass(KMeansMapper.class);
    conf.setCombinerClass(KMeansCombiner.class);
    conf.setReducerClass(KMeansReducer.class);
    conf.setNumReduceTasks(1);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set(Cluster.CLUSTER_PATH_KEY, clustersIn);
    conf.set(Cluster.DISTANCE_MEASURE_KEY, measureClass);
    conf.set(Cluster.CLUSTER_CONVERGENCE_KEY, convergenceDelta);

    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    try {
        FileSystem fs = FileSystem.get(conf);
        loadClusters(clustersIn + "/part-00000", conf, fs);
        client.setConf(conf);
        JobClient.runJob(conf);
        return isConverged(clustersOut + "/part-00000", conf, fs);
    } catch (Exception e) {
        log.warn(e.toString(), e);
        return true;
    }
}

From source file:reverseIndexer.reverseIndexer.java

public static void startJob(String[] args) {

    try {/*  ww w .ja va2s .c o m*/
        /*
                
         Configuration conf = new Configuration();
         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
         if (otherArgs.length < 2) {
         System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
         System.exit(2);
         }
         Job job = new Job(conf, "reverse indexer");
         job.setJarByClass(this.getClass());
         job.setMapperClass(IndexerMapper.class);
         job.setReducerClass(IndexerReducer.class);
         job.setMapOutputKeyClass(Text.class);
         job.setMapOutputValueClass(LineRecWritable.class);
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Text.class);
                
                
         for (int i = 1; i < otherArgs.length; i++) {
         FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
         }
         FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
         System.exit(job.waitForCompletion(true) ? 0 : 1);*/

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        reverseIndexMapper Map = new reverseIndexMapper();
        conf.setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) Map.getClass());

        reverseIndexReducer Reduce = new reverseIndexReducer();
        conf.setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());
        conf.setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {
        Exp.printStackTrace();

    }
}

From source file:temp.WordCount.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.//from w w  w. j a v a 2  s  .  co  m
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCount.class);
    conf.setJobName("wordcount");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    conf.setInputPath(new Path(other_args.get(0)));
    conf.setOutputPath(new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:U.CC.SpeciesIterDriver2.java

public static void main(String[] args) {

    int i = 0;/*w  w w . j  a va2s. com*/

    while (i < 10) {
        JobClient client = new JobClient();
        JobConf conf = new JobConf(SpeciesIterDriver2.class);
        conf.setJobName("Species Iter");

        conf.setNumReduceTasks(5);

        //~dk
        //conf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); 
        //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        if (args.length < 2) {
            System.out.println("Usage: PageRankIter <input path> <output path>");
            System.exit(0);
        }

        //~dk
        //conf.setInputPath(new Path(args[0])); 
        //conf.setOutputPath(new Path(args[1])); 
        //FileInputFormat.setInputPaths(conf, new Path(args[0]));
        //FileOutputFormat.setOutputPath(conf, new Path(args[1]));
        FileInputFormat.setInputPaths(conf, "output" + i);
        int newFileVal = i + 1;
        FileOutputFormat.setOutputPath(conf, new Path("output" + newFileVal));
        //conf.setInputPath(new Path("graph2")); 
        //conf.setOutputPath(new Path("graph3")); 

        conf.setMapperClass(SpeciesIterMapper2.class);
        conf.setReducerClass(SpeciesIterReducer2.class);
        conf.setCombinerClass(SpeciesIterReducer2.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
        i++;

    }

}

From source file:ucsc.hadoop.mapreduce.apache.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from  w  w w . j  ava 2s.c o  m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[2]);
        if (args.length == 4) {
            grepJob.set("mapred.mapper.regex.group", args[3]);
        }

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormat(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        JobClient.runJob(grepJob);

        // second job
        JobConf sortJob = new JobConf(getConf(), Grep.class);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormat(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        // sort by decreasing freq
        sortJob.setOutputKeyComparatorClass(LongWritable.DecreasingComparator.class);

        JobClient.runJob(sortJob);
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:uk.bl.wa.hadoop.hosts.HostsReport.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), HostsReport.class);

    log.info("Adding logs...");
    String line;//  w w w  . ja  v a2s  .c  o  m
    BufferedReader br = new BufferedReader(new FileReader(args[0]));
    while ((line = br.readLine()) != null) {
        log.info("Adding " + line);
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    conf.setJarByClass(HostsReport.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapperClass(HostsReportMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCombinerClass(HostsReportReducer.class);
    conf.setReducerClass(HostsReportReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:validatenacluster.ValidateNacluster.java

public static void main(String[] args) throws Exception {

    JobConf conf = new JobConf(ValidateNacluster.class);
    conf.setJobName("Partition for Machine Count");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    JobClient.runJob(conf);//w  w w .  ja  v  a 2  s  .  com

}

From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java

License:Apache License

public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
        Class<? extends Reducer> combinerClass) throws IOException, URISyntaxException {
    JobConf conf = createJobConf(mapperClass, reducerClass);
    conf.setCombinerClass(combinerClass);

    return conf;/*from   w ww.  j av a  2s .c  om*/
}

From source file:yangqi.hadoop.sample.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w  w w .ja  va 2 s  .com*/
}