Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.bah.applefox.main.plugins.fulltextindex.FTLoader.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * //ww  w. j ava 2 s  .  c  o m
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {
    try {
        // Initialize variables
        FTLoader.articleFile = args[8];
        FTLoader.maxNGrams = Integer.parseInt(args[9]);
        FTLoader.stopWords = getStopWords();
        FTLoader.dTable = args[10];
        FTLoader.urlCheckedTable = args[11];
        FTLoader.divsFile = args[20];
        FTLoader.exDivs = getExDivs();

        // Give the job a name
        String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

        // Create job and set the jar
        Job job = new Job(getConf(), jobName);
        job.setJarByClass(this.getClass());

        String urlTable = args[5];

        job.setInputFormatClass(AccumuloInputFormat.class);
        InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable,
                new Authorizations());

        job.setMapperClass(MapperClass.class);
        job.setMapOutputKeyClass(Key.class);
        job.setMapOutputValueClass(Value.class);

        job.setReducerClass(ReducerClass.class);
        job.setNumReduceTasks(Integer.parseInt(args[4]));

        job.setOutputFormatClass(AccumuloOutputFormat.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);

        AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable);

        job.waitForCompletion(true);

        return job.isSuccessful() ? 0 : 1;
    } catch (IOException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (InterruptedException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (ClassNotFoundException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    }
    return 1;
}

From source file:com.bah.applefox.main.plugins.imageindex.ImageLoader.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * /*  w ww  .j  a  v  a2 s  .co  m*/
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {

    checkedImages = args[18];
    hashTable = args[17];
    tagTable = args[19];
    divsFile = args[20];
    UserAgent = args[6];

    // Create the table
    AccumuloUtils.setSplitSize(args[23]);
    AccumuloUtils.connectBatchWrite(checkedImages).close();

    // Give the job a name
    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    // Create the job and set its jar
    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    // Set the url table to read from
    String urlTable = args[5];

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(Integer.parseInt(args[4]));

    job.setReducerClass(ReducerClass.class);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable);

    AccumuloUtils.setSplitSize(args[22]);

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.CountURLs.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    mappedInput = args[12] + "From";

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), mappedInput,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, args[15]);

    job.waitForCompletion(true);//from w w w .j  av  a  2  s  .  co m

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.DampenTable.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    tablePrefix = args[13];//from   w  w  w.j a v a2s  .  c o  m
    dampeningFactor = Double.parseDouble(args[14]);

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), tablePrefix + "New",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.InitializePRTables.java

License:Apache License

public int run(String[] args) throws Exception {

    tablePrefix = args[13];/*from w ww  . j  a  va2  s  . co m*/

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "Old");

    AccumuloUtils.connectBatchWrite(tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.MRPageRanking.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    tablePrefix = args[13];/*from www .  j  a v  a2 s .c  om*/
    outboundLinks = args[15];

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);

    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzer.java

License:Open Source License

public static void main(String[] args) {
    Configuration conf = new Configuration();
    if (args.length != 2) {
        System.err.println("Usage: AccessLogAnalyzer <input path> <output path>");
        System.exit(-1);//from   w  w w. ja v  a 2 s .c  o  m
    }
    String inputPath = args[0];
    String outputPath = args[1];
    try {
        Job job = new Job(conf, "AccessLogAnalyzer");
        job.setJarByClass(AccessLogAnalyzer.class);
        job.setMapperClass(AccessLogAnalyzerMapper.class);
        job.setReducerClass(AccessLogAnalyzerReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | ClassNotFoundException | InterruptedException e) {
    }
}

From source file:com.bark.hadoop.lab3.PageRank.java

@Override
public int run(String args[]) {
    String tmp = "/tmp/" + new Date().getTime();
    //        long timeStamp = new Date().getTime();
    try {/*from  w w  w. ja  va2 s.  c o m*/
        /**
         * Job 1: Parse XML input and read title,links
         */
        Configuration conf = new Configuration();
        conf.set("xmlinput.start", "<page>");
        conf.set("xmlinput.end", "</page>");

        Job job = Job.getInstance(conf);
        job.setJarByClass(PageRank.class);

        // specify a mapper
        job.setMapperClass(RedLinkMapper.class);

        // specify a reducer
        job.setReducerClass(RedLinkReducer.class);

        // specify output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job, new Path(args[0]));
        job.setInputFormatClass(XmlInputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1")));
        job.setOutputFormatClass(TextOutputFormat.class);

        job.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job1.");
        return 2;
    }
    /**
     * Job 2: Adjacency outGraph
     */
    try {
        Configuration conf2 = new Configuration();

        Job job2 = Job.getInstance(conf2);
        job2.setJarByClass(PageRank.class);

        // specify a mapper
        job2.setMapperClass(AdjMapper.class);

        // specify a reducer
        job2.setReducerClass(AdjReducer.class);

        // specify output types
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1")));
        job2.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2")));
        job2.setOutputFormatClass(TextOutputFormat.class);

        job2.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job2.");
        return 2;
    }
    /**
     * Job 3: PageCount
     */
    try {
        Configuration conf3 = new Configuration();
        /**
         * Change output separator to "=" instead of default \t for this job
         */
        conf3.set("mapreduce.output.textoutputformat.separator", "=");

        Job job3 = Job.getInstance(conf3);
        job3.setJarByClass(PageRank.class);

        // specify a mapper
        job3.setMapperClass(PageCountMapper.class);

        // specify a reducer
        job3.setReducerClass(PageCountReducer.class);

        // specify output types
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(IntWritable.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2")));
        job3.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3")));
        job3.setOutputFormatClass(TextOutputFormat.class);

        job3.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job3.");
        return 2;
    }
    /**
     * Job 4: PageRank
     */
    for (int i = 1; i < 9; i++) {
        try {
            Configuration conf4 = new Configuration();
            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf4);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf4.setInt("N", n);

            Job job4 = Job.getInstance(conf4);
            job4.setJarByClass(PageRank.class);

            // specify a mapper
            job4.setMapperClass(PageRankMapper.class);

            // specify a reducer
            job4.setReducerClass(PageRankReducer.class);

            // specify output types
            job4.setOutputKeyClass(Text.class);
            job4.setOutputValueClass(Text.class);

            // specify input and output DIRECTORIES
            if (i == 1) {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2")));
            } else {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1))));
            }
            job4.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i)));
            job4.setOutputFormatClass(TextOutputFormat.class);
            job4.waitForCompletion(true);
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job4.");
            return 2;
        }
    }
    /**
     * Job 5: Sort iteration 1 and iteration 8
     */
    int returnCode = 0;
    for (int i = 0; i < 2; i++) {
        try {
            Configuration conf5 = new Configuration();

            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf5);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf5.setInt("N", n);

            Job job5 = Job.getInstance(conf5);
            /**
             * one reducer only
             */
            job5.setNumReduceTasks(1);
            job5.setSortComparatorClass(MyWritableComparator.class);
            job5.setJarByClass(PageRank.class);

            // specify a mapper
            job5.setMapperClass(SortMapper.class);
            job5.setMapOutputKeyClass(DoubleWritable.class);
            job5.setMapOutputValueClass(Text.class);

            // specify a reducer
            job5.setReducerClass(SortReducer.class);

            // specify output types
            job5.setOutputKeyClass(Text.class);
            job5.setOutputValueClass(DoubleWritable.class);

            // specify input and output DIRECTORIES
            int y = 7 * i + 1;
            FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y)));
            job5.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y)));
            job5.setOutputFormatClass(TextOutputFormat.class);

            returnCode = job5.waitForCompletion(true) ? 0 : 1;
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job5.");
            return 2;
        }
    }
    /**
     * Copy necessary output files to args[1]        /**
     * Copy necessary output files to args[1]
     */

    /**
     * Rename and copy OutLinkGraph
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.outlink.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy total number of pages
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.n.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 1
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter1.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 8
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter8.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }
    return returnCode;
}

From source file:com.basho.riak.hadoop.RiakWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String[] keys = new String[10000];

    for (int i = 0; i < 10000; i++) {
        keys[i] = String.valueOf(i + 1000);
    }/*from   www. ja v  a 2  s. c  om*/
    Configuration conf = getConf();
    conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount"));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087));
    conf = RiakConfig.setOutputBucket(conf, "wordcount_out");
    conf = RiakConfig.setHadoopClusterSize(conf, 4);

    Job job = new Job(conf, "Riak-WordCount");

    job.setJarByClass(RiakWordCount.class);

    job.setInputFormatClass(RiakInputFormat.class);
    job.setMapperClass(TokenCounterMapper.class);

    job.setReducerClass(TokenCounterReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(RiakOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(WordCountResult.class);

    job.setNumReduceTasks(4);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilder.java

License:Apache License

/**
 * Creates a {@link Job} based on how {@code this} {@link KafkaJobBuilder} has been configured. There are no
 * side-effects on {@code this} instance when you call this method, so you can call it multiple times.
 * //  w  w  w  . j a v a2 s .  co  m
 * @param conf
 *            the job conf.
 * @return a fully configured {@link Job}.
 * @throws Exception error
 * @throws IllegalArgumentException
 *             if any required parameters are not set.
 */
public Job configureJob(final Configuration conf) throws Exception {
    validateSettings();
    final Job job = Job.getInstance(conf, getDefaultedJobName());

    // set queue inputs
    if (getQueueMappers().size() == 1) {
        job.setInputFormatClass(KafkaInputFormat.class);
        final TopicConf topicConf = Iterables.getOnlyElement(getQueueMappers());
        KafkaInputFormat.setTopic(job, topicConf.getTopic());
        KafkaInputFormat.setConsumerGroup(job, topicConf.getConsumerGroup());
        job.setMapperClass(topicConf.getMapper());
    } else {
        job.setInputFormatClass(MultipleKafkaInputFormat.class);
        for (final TopicConf topicConf : getQueueMappers()) {
            MultipleKafkaInputFormat.addTopic(job, topicConf.getTopic(), topicConf.getConsumerGroup(),
                    topicConf.getMapper());
        }
    }

    if (getMapOutputKeyClass() != null) {
        job.setMapOutputKeyClass(getMapOutputKeyClass());
    }

    if (getMapOutputValueClass() != null) {
        job.setMapOutputValueClass(getMapOutputValueClass());
    }

    if (getReducerClass() == null) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(getReducerClass());
        job.setNumReduceTasks(getNumReduceTasks());
    }

    if (getPartitionerClass() != null) {
        job.setPartitionerClass(getPartitionerClass());
    }

    // set output
    job.setOutputFormatClass(getOutputFormatClass());
    job.setOutputKeyClass(getOutputKeyClass());
    job.setOutputValueClass(getOutputValueClass());
    if (getOutputFormat() == SupportedOutputFormat.TEXT_FILE) {
        TextOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    } else if (getOutputFormat() == SupportedOutputFormat.SEQUENCE_FILE) {
        SequenceFileOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    }

    if (usingS3()) {
        job.getConfiguration().set("fs.s3n.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3n.awsSecretAccessKey", getS3SecretyKey());
        job.getConfiguration().set("fs.s3.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3.awsSecretAccessKey", getS3SecretyKey());
    }

    if (isLazyOutputFormat()) {
        LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
    }

    // setup kafka input format specifics
    KafkaInputFormat.setZkConnect(job, getZkConnect());
    KafkaInputFormat.setKafkaFetchSizeBytes(job, getKafkaFetchSizeBytes());

    job.setSpeculativeExecution(false);
    job.setJarByClass(getClass());

    // memory settings for mappers
    if (!Strings.isNullOrEmpty(getTaskMemorySettings())) {
        job.getConfiguration().set("mapred.child.java.opts", getTaskMemorySettings());
    }

    return job;
}