Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:edu.ldzm.average.AverageResponseTime.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job.//from w w  w.  j  av  a 2  s . c o m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AverageResponseTime.class);
    conf.setJobName("average_response_time");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    int param = 0;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param++;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else if ("-i".equals(args[i])) {
                param++;
                conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (param != 2) {
        System.out.println("-l  -i?");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.ncku.ikdd.ArtistAnalysis.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(ArtistAnalysis.class);
    conf.setJobName("artistanalysis");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/*from  ww  w. j av  a  2  s.  c o m*/
}

From source file:edu.ncku.ikdd.DataMining.java

public static void main(String[] argv) throws Exception {
    int candidateLength = 1;
    FileSystem dfs = FileSystem.get(new Configuration());
    do {//from www  .j  ava2  s .  co  m
        JobConf countConf = new JobConf(DataMining.class);

        countConf.setOutputKeyClass(Text.class);
        countConf.setOutputValueClass(IntWritable.class);

        countConf.setMapperClass(CountMap.class);
        countConf.setCombinerClass(CountCombine.class);
        countConf.setReducerClass(CountReduce.class);

        countConf.setInputFormat(TextInputFormat.class);
        countConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(countConf, new Path(argv[0]));
        FileOutputFormat.setOutputPath(countConf, new Path(count_path + String.valueOf(candidateLength)));
        countConf.setInt("minSupport", Integer.valueOf(argv[2]));
        countConf.setInt("candidateLength", candidateLength);
        JobClient.runJob(countConf);

        ++candidateLength;

        JobConf candidateConf = new JobConf(DataMining.class);

        candidateConf.setOutputKeyClass(Text.class);
        candidateConf.setOutputValueClass(Text.class);

        candidateConf.setMapperClass(CandidateMap.class);
        candidateConf.setReducerClass(CandidateReduce.class);

        candidateConf.setInputFormat(TextInputFormat.class);
        candidateConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(candidateConf,
                new Path(count_path + String.valueOf(candidateLength - 1) + "/part-00000"));
        FileOutputFormat.setOutputPath(candidateConf,
                new Path(candidate_path + String.valueOf(candidateLength)));
        candidateConf.setInt("candidateLength", candidateLength);

        JobClient.runJob(candidateConf);

    } while (dfs.getFileStatus(new Path(candidate_path + String.valueOf(candidateLength) + "/part-00000"))
            .getLen() > 0);

    BufferedReader br;
    BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(dfs.create(new Path(argv[1] + "/part-00000"))));
    String line;
    for (int i = 1; i < candidateLength; ++i) {
        br = new BufferedReader(
                new InputStreamReader(dfs.open(new Path(count_path + String.valueOf(i) + "/part-00000"))));
        while ((line = br.readLine()) != null) {
            bw.write(line + "\n");
        }
        br.close();
    }
    bw.close();
}

From source file:edu.ncku.ikdd.TempRecord.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(TempRecord.class);
    conf.setJobName("temprecord");

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/* w  ww.j a  va 2s.  c om*/
}

From source file:edu.ncku.ikdd.TitleParser.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(TitleParser.class);
    conf.setJobName("titleparser");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/* w w  w .j  ava  2s  . c  o m*/
}

From source file:edu.ncku.ikdd.WordCount.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/*ww  w  .j  a va  2 s. c  om*/
}

From source file:edu.ubc.mirrors.holographs.mapreduce.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {
    JobConf job = new JobConf(getConf());
    job.setClassLoader(Driver.class.getClassLoader());
    job.setInputFormat(SnapshotObjectsOfTypeInputFormat.class);
    job.setMapperClass(InvokeMethodMapper.class);
    job.setCombinerClass(TextCountSumReducer.class);
    job.setReducerClass(TextCountSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.set("snapshotPath", args[0]);
    job.set("targetClassName", "org.eclipse.cdt.internal.core.dom.parser.cpp.CPPASTName");
    job.setInt("splitSize", 10000);
    job.setInt("maxNumObjects", 100000);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    String outputPath = args[1];//  www.  ja va 2 s.c o  m
    int suffix = 2;
    while (new File(outputPath).exists()) {
        outputPath = args[1] + suffix++;
    }
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    JobClient.runJob(job);
    return 0;
}

From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java

License:Apache License

public static void main(String[] args) throws IOException {

    JobConf job = new JobConf(DataGenerator.class);
    FileSystem dfs = FileSystem.get(job);
    String maxFile = "/maxtemp";
    dfs.delete(new Path(maxFile), true);

    job.setJobName(DataGenerator.class.getSimpleName() + "max ID");
    job.setMapperClass(MapMaxId.class);
    job.setCombinerClass(CombineMaxId.class);
    job.setReducerClass(ReduceMaxId.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(VLongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(maxFile));
    job.setNumReduceTasks(1);/*  w  w w .  j ava2 s  .  c  om*/
    JobClient.runJob(job);

    job = new JobConf(DataGenerator.class);
    job.set("hyracks.maxid.file", maxFile);
    job.setInt("hyracks.x", Integer.parseInt(args[2]));
    dfs.delete(new Path(args[1]), true);

    job.setJobName(DataGenerator.class.getSimpleName());
    job.setMapperClass(MapRecordGen.class);
    job.setReducerClass(ReduceRecordGen.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[3]));

    if (args.length > 4) {
        if (args[4].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[4].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setCombinerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }//from  w ww.ja  v  a 2  s .  c  o m
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexAggregator.class);

    job.setJobName(VertexAggregator.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setCombinerClass(CombineRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);/*  w  w w  .  ja v a2s.c  om*/
}