Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:com.mycompany.wordcount.WCMain.java

@Override
public int run(String[] args) throws Exception {
    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
    JobConf conf = new JobConf(WCMain.class);
    conf.setJobName("WordCount");

    // key value/*from w w  w . j a  v  a  2s  .  c  om*/
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // mapper and reducer

    conf.setMapperClass(WCMapper.class);
    conf.setReducerClass(WCReducer.class);

    // input output format
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.ostor.dedup.hadoop.DedupStorHadoopCreateObjectsMapReduce.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("NOTE: Setting up logs from conf file - " + DedupStor.DEFAULT_LOG4J_FILE);

    PropertyConfigurator.configure(DedupStor.DEFAULT_LOG4J_FILE);

    JobConf conf = new JobConf(DedupStorHadoopCreateObjectsMapReduce.class);
    conf.setJobName("dedup-create-objects");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(DedupObjectSegmentWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DedupStorHadoopCreateObjectsMapper.class);
    conf.setReducerClass(DedupStorHadoopCreateObjectsReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    Path inputPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH);
    Path segmentStorPath = new Path(args[0],
            DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_SEGMENTS_LOC_SUFFIX);
    Path objectStorPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_LOC_SUFFIX);
    Path objectMapPath = new Path(args[0], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH);

    conf.set(DedupStorHadoopUtils.HADOOP_CONF_SEGMENTS_STOR_PATH_KEY, segmentStorPath.toString());
    conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_STOR_PATH_KEY, objectStorPath.toString());
    conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_TMP_PATH_KEY, objectMapPath.toString());

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, objectStorPath);

    JobClient.runJob(conf);/* ww w . ja v a  2  s . co m*/
}

From source file:com.ostor.dedup.hadoop.DedupStorHadoopCreateSegmentsMapReduce.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("NOTE: Setting up logs from conf file - " + DedupStor.DEFAULT_LOG4J_FILE);

    PropertyConfigurator.configure(DedupStor.DEFAULT_LOG4J_FILE);

    JobConf conf = new JobConf(DedupStorHadoopCreateSegmentsMapReduce.class);
    conf.setJobName("dedup-create-segments");

    conf.setMapOutputKeyClass(DedupHashWritable.class);
    conf.setMapOutputValueClass(DedupObjectSegmentCompleteWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DedupObjectSegmentWritable.class);

    conf.setMapperClass(DedupStorHadoopCreateSegmentsMapper.class);
    conf.setReducerClass(DedupStorHadoopCreateSegmentsReducer.class);

    conf.setInputFormat(DedupObjectInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    logger.info("Set input dir - " + args[0]);
    logger.info("Set output dir - " + args[1]);

    Path inputPath = new Path(args[0]);
    Path segmentStorPath = new Path(args[1],
            DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_SEGMENTS_LOC_SUFFIX);
    Path objectMapPath = new Path(args[1], DedupStorHadoopUtils.DEFAULT_DEDUP_STOR_HADOOP_OBJECTS_TMP_PATH);

    conf.set(DedupStorHadoopUtils.HADOOP_CONF_SEGMENTS_STOR_PATH_KEY, segmentStorPath.toString());
    conf.set(DedupStorHadoopUtils.HADOOP_CONF_OBJECTS_TMP_PATH_KEY, objectMapPath.toString());

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, objectMapPath);

    JobClient.runJob(conf);//from   w  ww .  j  a  v  a  2 s . com
}

From source file:com.pegasus.ResultInfo.java

License:Apache License

protected JobConf configStage1() throws Exception {
    final JobConf conf = new JobConf(getConf(), ConCmpt.class);
    conf.set("cur_iter", "" + cur_iter);
    conf.set("make_symmetric", "" + make_symmetric);
    conf.setJobName("ConCmpt_Stage1");

    conf.setMapperClass(MapStage1.class);
    conf.setReducerClass(RedStage1.class);

    FileInputFormat.setInputPaths(conf, edge_path, curbm_path);
    FileOutputFormat.setOutputPath(conf, tempbm_path);

    conf.setNumReduceTasks(nreducers);//ww w.j  a  v a 2 s .co  m

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:com.pegasus.ResultInfo.java

License:Apache License

protected JobConf configStage2() throws Exception {
    final JobConf conf = new JobConf(getConf(), ConCmpt.class);
    conf.set("cur_iter", "" + cur_iter);
    conf.set("make_symmetric", "" + make_symmetric);
    conf.setJobName("ConCmpt_Stage2");

    conf.setMapperClass(MapStage2.class);
    conf.setReducerClass(RedStage2.class);
    conf.setCombinerClass(CombinerStage2.class);

    FileInputFormat.setInputPaths(conf, tempbm_path);
    FileOutputFormat.setOutputPath(conf, nextbm_path);

    conf.setNumReduceTasks(nreducers);/*from  w  ww .j  a  va  2 s  .c om*/

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:com.pegasus.ResultInfo.java

License:Apache License

protected JobConf configStage3() throws Exception {
    final JobConf conf = new JobConf(getConf(), ConCmpt.class);
    conf.setJobName("ConCmpt_Stage3");

    conf.setMapperClass(MapStage3.class);
    conf.setReducerClass(RedStage3.class);
    conf.setCombinerClass(RedStage3.class);

    FileInputFormat.setInputPaths(conf, nextbm_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    conf.setNumReduceTasks(1); // This is necessary.

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;//w w  w. ja va2 s .com
}

From source file:com.qfa.WordCount.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker./*from w  ww .j a v  a  2s . co m*/
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCount.class);
    conf.setJobName("wordcount");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.rapleaf.hank.hadoop.HadoopDomainBuilder.java

License:Apache License

public static final JobConf createJobConfiguration(String inputPath,
        Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, int versionNumber,
        DomainBuilderProperties properties) {
    JobConf conf = new JobConf();
    // Input specification
    conf.setInputFormat(inputFormatClass);
    FileInputFormat.setInputPaths(conf, inputPath);
    // Mapper class and key/value classes
    conf.setMapperClass(mapperClass);/*from ww w. j ava 2s . co  m*/
    conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class);
    conf.setMapOutputValueClass(ValueWritable.class);
    // Reducer class and key/value classes
    conf.setReducerClass(DomainBuilderReducer.class);
    conf.setOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setOutputValueClass(ValueWritable.class);
    // Output format
    conf.setOutputFormat(properties.getOutputFormatClass());
    // Output path (set to tmp output path)
    FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber)));
    // Partitioner
    conf.setPartitionerClass(DomainBuilderPartitioner.class);
    // Output Committer
    conf.setOutputCommitter(DomainBuilderOutputCommitter.class);
    // Hank specific configuration
    properties.setJobConfProperties(conf, versionNumber);
    return conf;
}

From source file:com.ricemap.spateDB.operations.FileMBR.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from ww w  . j  a  v  a 2s .c o  m*/
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape,
        boolean background) throws IOException {
    // Quickly get file MBR if it is globally indexed
    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file);
    if (globalIndex != null) {
        // Return the MBR of the global index.
        // Compute file size by adding up sizes of all files assuming they are
        // not compressed
        long totalLength = 0;
        for (Partition p : globalIndex) {
            Path filePath = new Path(file, p.filename);
            if (fs.exists(filePath))
                totalLength += fs.getFileStatus(filePath).getLen();
        }
        sizeOfLastProcessedFile = totalLength;
        return globalIndex.getMBR();
    }
    JobConf job = new JobConf(FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("FileMBR");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Prism.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeInputFormat.class);
    SpatialSite.setShapeClass(job, stockShape.getClass());
    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, file);
    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputCommitter(MBROutputCommitter.class);

    // Submit the job
    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
        return null;
    } else {
        lastSubmittedJob = JobClient.runJob(job);
        Counters counters = lastSubmittedJob.getCounters();
        Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
        FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue();

        // Read job result
        FileStatus[] results = outFs.listStatus(outputPath);
        Prism mbr = new Prism();
        for (FileStatus fileStatus : results) {
            if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
                LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
                Text text = new Text();
                if (lineReader.readLine(text) > 0) {
                    mbr.fromText(text);
                }
                lineReader.close();
            }
        }

        outFs.delete(outputPath, true);

        return mbr;
    }
}

From source file:com.ricemap.spateDB.operations.LineRandomizer.java

License:Apache License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from w w  w .ja v a  2s .  com*/
 * @param conf
 * @param infs
 * @param infile
 * @return
 * @throws IOException 
 */
public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException {
    JobConf job = new JobConf(LineRandomizer.class);

    FileSystem outfs = outfile.getFileSystem(job);

    if (overwrite)
        outfs.delete(outfile, true);

    job.setJobName("Randomizer");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    FileSystem infs = infile.getFileSystem(job);
    int numOfPartitions = (int) Math
            .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile));
    job.setInt(NumOfPartitions, numOfPartitions);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, infile);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outfile);

    // Submit the job
    JobClient.runJob(job);
}