Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Create a JobConf for a Job that will calculate the number of unique listeners per track.
     * //  w ww  . j a v  a2s .c o m
     * @param inputDir The path to the folder containing the raw listening data files.
     * @return The unique listeners JobConf.
     */
    private JobConf getUniqueListenersJobConf(Path inputDir) {
        log.info("Creating configuration for unique listeners Job");

        // output results to a temporary intermediate folder, this will get deleted by start() method
        Path uniqueListenersOutput = new Path("uniqueListeners");

        JobConf conf = new JobConf(TrackStatisticsProgram.class);
        conf.setOutputKeyClass(IntWritable.class); // track id
        conf.setOutputValueClass(IntWritable.class); // number of unique listeners
        conf.setInputFormat(TextInputFormat.class); // raw listening data
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setMapperClass(UniqueListenersMapper.class);
        conf.setCombinerClass(UniqueListenersCombiner.class);
        conf.setReducerClass(UniqueListenersReducer.class);

        FileInputFormat.addInputPath(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, uniqueListenersOutput);
        conf.setJobName("uniqueListeners");
        return conf;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Creates a JobConf for a Job that will sum up the TrackStatistics per track.
     * //from  w  w  w .  ja v  a2s.  c  o  m
     * @param inputDir The path to the folder containing the raw input data files.
     * @return The sum JobConf.
     */
    private JobConf getSumJobConf(Path inputDir) {
        log.info("Creating configuration for sum job");
        // output results to a temporary intermediate folder, this will get deleted by start() method
        Path playsOutput = new Path("sum");

        JobConf conf = new JobConf(TrackStatisticsProgram.class);
        conf.setOutputKeyClass(IntWritable.class); // track id
        conf.setOutputValueClass(TrackStats.class); // statistics for a track
        conf.setInputFormat(TextInputFormat.class); // raw listening data
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setMapperClass(SumMapper.class);
        conf.setCombinerClass(SumReducer.class);
        conf.setReducerClass(SumReducer.class);

        FileInputFormat.addInputPath(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, playsOutput);
        conf.setJobName("sum");
        return conf;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Creates a JobConf for a Job that will merge the unique listeners and track statistics.
     * /*  w  w  w  . jav a  2  s  . c om*/
     * @param outputPath The path for the results to be output to.
     * @param sumInputDir The path containing the data from the sum Job.
     * @param listenersInputDir The path containing the data from the unique listeners job.
     * @return The merge JobConf.
     */
    private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) {
        log.info("Creating configuration for merge job");
        JobConf conf = new JobConf(TrackStatisticsProgram.class);
        conf.setOutputKeyClass(IntWritable.class); // track id
        conf.setOutputValueClass(TrackStats.class); // overall track statistics
        conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here
        conf.setReducerClass(SumReducer.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileOutputFormat.setOutputPath(conf, outputPath);

        MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class);
        MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class,
                MergeListenersMapper.class);
        conf.setJobName("merge");
        return conf;
    }

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob4.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob4.class);
    conf.setJobName(GoogleSyntacticsJob4.class.getSimpleName());

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from   w  w w .  ja  v a 2s. c om

    String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
    for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
        DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf);

    conf.setMapperClass(GoogleSyntacticsJob4Mapper.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(NullWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(0);
    conf.setCombinerClass(IdentityReducer.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:de.tudarmstadt.lt.nlkg.ConvertInvertSVO.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), ConvertInvertSVO.class);
    conf.setJobName(ConvertInvertSVO.class.getSimpleName());

    conf.setMapperClass(ConversionMapper.class);
    conf.setCombinerClass(IdentityReducer.class);
    conf.setReducerClass(IdentityReducer.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ConvertedWritable.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/* w  w  w. j av a2 s  .c o  m*/
    return 0;
}

From source file:de.tudarmstadt.lt.nlkg.ConvertSVO.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), ConvertSVO.class);
    conf.setJobName(ConvertSVO.class.getSimpleName());

    conf.setMapperClass(ConversionMapper.class);
    conf.setCombinerClass(IdentityReducer.class);
    conf.setReducerClass(IdentityReducer.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ConvertedWritable.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//ww  w.  j a  va2s.c  o  m
    return 0;
}

From source file:dinocode.SpeciesGraphBuilder.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Page-rank Species Graph Builder");
    final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result";
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    //conf.setOutputKeyClass(Text.class); 
    //conf.setOutputValueClass(Text.class); 
    conf.setMapperClass(SpeciesGraphBuilderMapperd.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); 
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 
    conf.setReducerClass(SpeciesGraphBuilderReducerd.class);
    //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); 

    //conf.setInputPath(new Path("graph1")); 
    //conf.setOutputPath(new Path("graph2")); 
    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    client.setConf(conf);/*from  w  ww. j av a  2 s.co  m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000";
    for (int i = 0; i < 500; i++) {
        client = new JobClient();
        conf = new JobConf(SpeciesDriver.class);
        conf.setJobName("Species Iter");

        int count = i + 1;
        outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count;
        conf.setNumReduceTasks(5);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(conf, new Path(inFiles));
        FileOutputFormat.setOutputPath(conf, new Path(outFiles));

        conf.setMapperClass(SpeciesIterMapper2d.class);
        conf.setReducerClass(SpeciesIterReducer2d.class);
        conf.setCombinerClass(SpeciesIterReducer2d.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
        inFiles = outFiles;

    }

    //Viewer
    client = new JobClient();
    conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Species Viewer");

    conf.setOutputKeyClass(FloatWritable.class);
    conf.setOutputValueClass(Text.class);

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000";
    outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal";

    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    conf.setMapperClass(SpeciesViewerMapperd.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    client.setConf(conf);
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark2.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);
    JobConf job = base.getJobConf();

    job.setInputFormat(base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    //job.setInputFormat(KeyValueSetInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    if (base.getTupleData()) {
        job.setMapperClass(Benchmark2.TupleWritableMap.class);
    } else {/* w  w  w .j  a  v  a 2 s .c  o m*/
        job.setMapperClass(Benchmark2.TextMap.class);
    }
    job.setCombinerClass(Benchmark2.Reduce.class);
    job.setReducerClass(Benchmark2.Reduce.class);
    //job.setNumReduceTasks(0);

    try {
        base.runJob(job);
        if (base.getCombine())
            base.runCombine();
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(1);
    }
    return 0;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark4.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);
    JobConf job = base.getJobConf();

    job.setInputFormat(TextInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setMapperClass(Benchmark4.Map.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    try {/*w  ww.  j ava2 s.com*/
        job.setCompressMapOutput(base.getCompress());
        base.runJob(job);

        if (base.getCombine())
            base.runCombine();
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(1);
    }
    return 0;
}

From source file:edu.ldzm.analysis.AnalysisSummary.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job./*from ww w. j  ava2 s .  co  m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AnalysisSummary.class);
    conf.setJobName("analysis_summery");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    boolean param = false;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param = true;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (!param) {
        System.out.println("-l namelist.txt");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}