Example usage for org.apache.hadoop.mapred JobConf setReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:ivory.ptc.AnchorTextInvertedIndex.java

License:Apache License

@Override
public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), AnchorTextInvertedIndex.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = conf.getInt("Ivory.NumMapTasks", 1);
    int reduceTasks = conf.getInt("Ivory.NumReduceTasks", 100);
    String weightingSchemeParameters = conf.get("Ivory.WeightingSchemeParameters");

    LOG.info("BuildAnchorTextInvertedIndex");
    LOG.info(" - input path: " + inPath);
    LOG.info(" - output path: " + outPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - weighting scheme: " + conf.get("Ivory.WeightingScheme"));
    LOG.info(" - weighting scheme parameters: " + weightingSchemeParameters);

    String[] params = weightingSchemeParameters.split(PARAMETER_SEPARATER);
    for (String param : params) {
        DistributedCache.addCacheFile(new URI(param), conf);
    }/*  w w  w .  j av  a 2s . com*/

    conf.setJobName("BuildAnchorTextInvertedIndex");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");
    conf.setInt("mapred.task.timeout", 60000000);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(AnchorTextTarget.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatJudgments.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();/*  w  ww .  j ava  2 s  .  co  m*/
        return -1;
    }
    JobConf conf = new JobConf(getConf(), XMLFormatJudgments.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    String docnoMapping = args[2];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoJudgments");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    DistributedCache.addCacheFile(new URI(docnoMapping), conf);
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatQueries.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();/*from w ww  .ja v a  2 s.  co m*/
        return -1;
    }

    JobConf conf = new JobConf(getConf(), XMLFormatQueries.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoQueries");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.SortedPseudoTestCollection.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), SortedPseudoTestCollection.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    LOG.info("SortedPseudoTestCollection");
    LOG.info(" - Input path: " + conf.get("Ivory.InputPath"));
    LOG.info(" - Output path: " + conf.get("Ivory.OutputPath"));
    LOG.info(" - JudgmentExtractor: " + conf.get("Ivory.JudgmentExtractor"));
    LOG.info(" - JudgmentExtractorParameters: " + conf.get("Ivory.JudgmentExtractorParameters"));
    LOG.info(" - SamplingCriterion: " + conf.get("Ivory.SamplingCriterion"));
    LOG.info(" - SamplingCriterionParameters: " + conf.get("Ivory.SamplingCriterionParameters"));
    LOG.info(" - QueryScorer: " + conf.get("Ivory.QueryScorer"));

    conf.setJobName("SortedPTC");
    conf.setNumMapTasks(mapTasks);/*from   ww w .  j  a  v a2 s.c  om*/
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(PseudoQuery.class);
    conf.setOutputValueClass(PseudoJudgments.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}

From source file:job.uncombine.compressed.BigBuildInvertedIndex.java

License:Apache License

/**
 * Runs this tool./*from  w  w  w  .java  2s.c om*/
 */
public int run(String[] args) throws Exception {

    //long GB = 1024 * 1024 * 1024;
    //long totalDataSize = 1 * GB;

    int reduceNumArray[] = { 9, 18 };
    int splitSizeMBArray[] = { 64, 128, 256 };
    int xmxArray[] = { 1000, 2000, 3000, 4000 };
    int xmsArray[] = { 0, 1 };
    int ismbArray[] = { 200, 400, 600, 800 };

    for (int splitIndex = 0; splitIndex < splitSizeMBArray.length; splitIndex++) {
        for (int reduceNumIndex = 0; reduceNumIndex < reduceNumArray.length; reduceNumIndex++) {
            for (int xmxIndex = 0; xmxIndex < xmxArray.length; xmxIndex++) {
                for (int xmsIndex = 0; xmsIndex < xmsArray.length; xmsIndex++) {
                    for (int ismbIndex = 0; ismbIndex < ismbArray.length; ismbIndex++) {

                        int reduceNum = reduceNumArray[reduceNumIndex];
                        int splitMB = splitSizeMBArray[splitIndex];
                        int xmx = xmxArray[xmxIndex];
                        int xms = xmsArray[xmsIndex] * xmx;
                        int ismb = ismbArray[ismbIndex];

                        JobConf conf = new JobConf(getConf(), BigBuildInvertedIndex.class);

                        conf.setLong("mapred.min.split.size", SplitTable.getMapred_min_split_size(splitMB));
                        conf.setLong("mapred.max.split.size", SplitTable.getMapred_max_split_size(splitMB));

                        //conf.setInt("my.sample.split.num", (int) (totalDataSize / (splitMB * 1024 * 1024)));

                        conf.setInt("mapred.reduce.tasks", reduceNum);
                        conf.setInt("io.sort.mb", ismb);

                        if (xms == 0)
                            conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m");
                        else
                            conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m -Xms" + xms + "m");

                        conf.setInt("child.monitor.metrics.seconds", 2);
                        conf.setInt("child.monitor.jvm.seconds", 2);
                        conf.setInt("child.monitor.jstat.seconds", 2);

                        conf.setJobName("BigBuildInvertedIndex " + splitMB + "MB "
                                + conf.get("mapred.child.java.opts") + " ismb=" + ismb + " RN=" + reduceNum);

                        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
                        if (otherArgs.length != 2) {
                            System.err.println("Usage: BigBuildInvertedIndex <in> <out>");
                            System.exit(2);
                        }

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(PairOfInts.class);
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(PairOfWritables.class);
                        SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);
                        conf.setOutputFormat(MapFileOutputFormat.class);

                        conf.setMapperClass(MyMapper.class);
                        // conf.setCombinerClass(IdentityReducer.class);
                        conf.setReducerClass(MyReducer.class);
                        FileInputFormat.setInputPaths(conf, new Path(otherArgs[0]));
                        FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));

                        FileSystem.get(conf).delete(new Path(otherArgs[1]), true);

                        try {
                            JobClient.runJob(conf);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        Thread.sleep(15000);

                    }
                }
            }
        }
    }
    return 0;
}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFt.java

License:Apache License

/**
 * Set the job configuration, classes and run the job.
 *///from www .  j  av a  2 s.  c  om
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
    JobConf conf = HadoopUtil.generateJobConf(args);
    //      JobConf conf  = new JobConf(AggrPerFt.class);
    //      conf.setJobName("AggrPerFt");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /*
     * use compression
     */
    //      conf.set("mapred.output.compress", "true");
    //      conf.set("mapred.map.output.compress", "true");
    //      conf.set("mapred.map.output.compression.codec",
    //            "org.apache.hadoop.io.compress.SnappyCodec");
    //      conf.set("mapred.output.compression.codec",
    //            "org.apache.hadoop.io.compress.SnappyCodec");

    /* set the maximum number of task per node */
    int maptasks = 120;

    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.map.tasks", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);

    int reducetasks = 120;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);

    /*
     * heap size for the job
     */
    conf.set("mapred.child.java.opts", "-Xmx1500m");

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "2048");
    conf.set("mapred.job.reduce.memory.mb", "2048");

    JobClient.runJob(conf);

}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFtUniquePositions.java

License:Apache License

/**
 * Set the job configuration, classes and run the job.
 *///from ww w . j  a va 2 s  . com
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
    JobConf conf = HadoopUtil.generateJobConf(args);
    //      JobConf conf = new JobConf(AggrPerFtUniquePositions.class);
    conf.setJobName("AggrPerFtUniquePositions " + args[0] + " " + args[1]);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /*
     * use compression
     */
    conf.set("mapred.output.compress", "true");
    conf.set("mapred.map.output.compress", "true");
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    /* set the maximum number of task per node */
    int maptasks = 120;

    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.map.tasks", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);

    int reducetasks = 60;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);

    /*
     * heap size for the job
     */
    conf.set("mapred.child.java.opts", "-Xmx1500m");

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "2048");
    conf.set("mapred.job.reduce.memory.mb", "2048");

    JobClient.runJob(conf);

}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFtWithParams.java

License:Apache License

/**
 * Set the job configuration, classes and run the job.
 *//*from   www.  j av  a2  s. c o  m*/
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    JobConf conf = HadoopUtil.generateJobConf(args);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /* set the maximum number of task per node */
    int maptasks = 120;

    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.map.tasks", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);

    int reducetasks = 120;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);

    /*
     * heap size for the job
     */
    conf.set("mapred.child.java.opts", "-Xmx1500m");

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "2048");
    conf.set("mapred.job.reduce.memory.mb", "2048");

    JobClient.runJob(conf);

}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerWord.java

License:Apache License

/**
 * Set the job configuration, classes and run the job.
 */// www.  j  ava  2 s .  c om
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    JobConf conf = HadoopUtil.generateJobConf(args);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /*
     * use compression
     */
    conf.set("mapred.output.compress", "true");
    conf.set("mapred.map.output.compress", "true");
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    /* set the maximum number of task per node */
    int maptasks = 120;

    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.map.tasks", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);

    int reducetasks = 120;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);

    /*
     * heap size for the job
     */
    conf.set("mapred.child.java.opts", "-Xmx1500m");

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "2048");
    conf.set("mapred.job.reduce.memory.mb", "2048");

    JobClient.runJob(conf);

}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.CleanContext.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
    JobConf conf = HadoopUtil.generateJobConf(args);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);

    conf.setCombinerClass(IntSumReducer.class);
    conf.setReducerClass(IntSumReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /* number of milliseconds before killing a not responding task */
    conf.set("mapred.task.timeout", "600000");

    /* change to 128mb */
    conf.set("dfs.block.size", "134217728");

    /* set the maximum number of task per node */
    int maptasks = 100;

    /*//from  ww  w  .ja  v  a  2  s . c  o  m
     * Number of map tasks to deploy on each machine. 0.5 to 2 *
     * (cores/node)
     */
    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);
    /*
     * The default number of map tasks per job. Typically set to a prime
     * several times greater than number of available hosts.
     */
    conf.set("mapred.map.tasks", "" + maptasks);

    int reducetasks = 120;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);
    conf.set("mapred.job.map.memory.mb", "3000");
    conf.set("mapred.job.reduce.memory.mb", "3000");
    JobClient.runJob(conf);

}