Example usage for org.apache.hadoop.mapred JobConf setReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:SleepJob.java

License:Apache License

public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount) {
    JobConf job = new JobConf(getConf(), SleepJob.class);
    job.setNumMapTasks(numMapper);/*w  ww .j  a va2s  . co m*/
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepJob.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepJob.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setInputFormat(SleepInputFormat.class);
    job.setPartitionerClass(SleepJob.class);
    job.setSpeculativeExecution(false);
    FileInputFormat.addInputPath(job, new Path("ignored"));
    job.setLong("sleep.job.map.sleep.time", mapSleepTime);
    job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
    job.setInt("sleep.job.map.sleep.count", mapSleepCount);
    job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
    return job;
}

From source file:update_sentiment.java

License:LGPL

public static void runjob(String input, String output) throws Exception {
    JobConf conf = new JobConf(update_sentiment.class);
    conf.setJobName("Update_Sentiment_Train");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);/*from  w  ww.  j  a  v  a2s  . co m*/
}

From source file:BMTKeyValueLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), CassandraTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("BMTKeyValueLoader");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);/*  w  w  w .  j  av  a2s . c o  m*/
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*///from ww w. j a  v a 2 s  . co  m
public static void main(String args[]) throws Exception {
    // run the HoopLink constructor; We need this to have a global settings registry       
    @SuppressWarnings("unused")
    HoopLink link = new HoopLink();

    dbg("main ()");

    showTimeStamp();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */

    HoopPerformanceMeasure metrics = new HoopPerformanceMeasure();
    metrics.setMarker("main");
    HoopLink.metrics.getDataSet().add(metrics);

    if (parseArgs(args) == false) {
        usage();
        return;
    }

    if (HoopLink.postonly == true) {
        postOnly();
        return;
    }

    if (HoopLink.task.equals("none") == true) {
        dbg("No task defined, please use the commandline option -task <task>");
        return;
    }

    dbg("Starting system ...");

    HoopRemoteTask driver = new HoopRemoteTask();

    if (HoopLink.useHadoop == false) {
        dbg("Starting built-in mapper ...");

        driver.indexDocuments();
    } else {
        dbg("Starting hadoop job ...");

        Configuration conf = new Configuration();

        // TRANSFER SETTHoopGS FROM HoopLink to Configuration!!!

        transferConf(conf);

        // Now we're feeling much better

        HoopRemoteTask.hdfs = FileSystem.get(conf);

        if (HoopLink.dbglocal == true) {
            dbg("Enabling local debugging ...");
            conf.set("mapred.job.tracker", "local");
        } else
            dbg("Disabling local debugging");

        JobConf job = new JobConf(conf, HoopRemoteTask.class);

        job.setJobName(driver.getClassName());

        driver.setJob(job);

        @SuppressWarnings("unused")
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        job.setJarByClass(HoopRemoteTask.class);

        if (HoopLink.task.equals("invert") == true) {
            dbg("Configuring job for invert task ...");

            job.setReducerClass(HoopInvertedListReducer.class);
            job.setMapperClass(HoopInvertedListMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
        }

        if (HoopLink.task.equals("wordcount") == true) {
            dbg("Configuring job for wordcount task ...");

            job.setReducerClass(HoopWordCountReducer.class);
            job.setMapperClass(HoopWordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
        }

        dbg("Using input path: " + HoopLink.datapath);
        dbg("Using output path: " + HoopLink.outputpath);

        FileInputFormat.addInputPath(job, new Path(HoopLink.datapath));
        FileOutputFormat.setOutputPath(job, new Path(HoopLink.outputpath));

        job.setInputFormat(HoopWholeFileInputFormat.class);

        if ((HoopLink.shardcreate.equals("mos") == true) && (HoopLink.nrshards > 1)) {
            dbg("Setting output to sharded output streams class ...");

            job.setOutputFormat(HoopShardedOutputFormat.class);
        } else
            job.setOutputFormat(TextOutputFormat.class);

        /**
         * Temporarily commented out for testing purposes
         */

        //job.setPartitionerClass (HoopPartitioner.class);                      

        driver.register("Main");

        JobClient.runJob(job);

        postProcess(conf);
    }

    showTimeStamp();

    metrics.closeMarker();
    long timeTaken = metrics.getYValue();
    //long timeTaken=metrics.getMarkerRaw ();
    metrics.printMetrics(timeTaken);

    driver.unregister();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */
    //stats.calcStatistics();
    //dbg (stats.printStatistics());
}

From source file:NaivePageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    int iteration = -1;
    String inputPath = args[0];//  w w w .  ja  v  a 2 s  .co  m
    String outputPath = args[1];
    int specIteration = 0;
    if (args.length > 2) {
        specIteration = Integer.parseInt(args[2]);
    }
    int numNodes = 100000;
    if (args.length > 3) {
        numNodes = Integer.parseInt(args[3]);
    }
    int numReducers = 32;
    if (args.length > 4) {
        numReducers = Integer.parseInt(args[4]);
    }
    System.out.println("specified iteration: " + specIteration);
    long start = System.currentTimeMillis();

    /**
     * job to count out-going links for each url
     */
    JobConf conf = new JobConf(NaivePageRank.class);
    conf.setJobName("PageRank-Count");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(CountMapper.class);
    conf.setReducerClass(CountReducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/count"));
    conf.setNumReduceTasks(numReducers);
    JobClient.runJob(conf);

    /******************** Initial Rank Assignment Job ***********************/
    conf = new JobConf(NaivePageRank.class);
    conf.setJobName("PageRank-Initialize");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(InitialRankAssignmentMapper.class);
    conf.setReducerClass(InitialRankAssignmentReducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
    conf.setNumReduceTasks(numReducers);
    // conf.setIterative(false);
    JobClient.runJob(conf);
    iteration++;

    do {
        /****************** Join Job ********************************/
        conf = new JobConf(NaivePageRank.class);
        conf.setJobName("PageRank-Join");
        conf.setOutputKeyClass(Text.class);
        // conf.setOutputValueClass(Text.class);
        conf.setMapperClass(ComputeRankMap.class);
        conf.setReducerClass(ComputeRankReduce.class);
        conf.setMapOutputKeyClass(TextPair.class);
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        conf.setPartitionerClass(FirstPartitioner.class);
        conf.setOutputKeyComparatorClass(KeyComparator.class);
        conf.setOutputValueGroupingComparator(GroupComparator.class);

        // relation table
        FileInputFormat.setInputPaths(conf, new Path(inputPath));
        // rank table
        FileInputFormat.addInputPath(conf, new Path(outputPath + "/i" + (iteration - 1)));
        // count table
        FileInputFormat.addInputPath(conf, new Path(outputPath + "/count"));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
        conf.setNumReduceTasks(numReducers);
        JobClient.runJob(conf);
        iteration++;

        /******************** Rank Aggregate Job ***********************/
        conf = new JobConf(NaivePageRank.class);
        conf.setJobName("PageRank-Aggregate");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        conf.setMapOutputKeyClass(Text.class);
        conf.setMapperClass(RankAggregateMapper.class);
        conf.setReducerClass(RankAggregateReducer.class);
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(outputPath + "/i" + (iteration - 1)));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration));
        conf.setNumReduceTasks(numReducers);
        conf.setInt("haloop.num.nodes", numNodes);
        JobClient.runJob(conf);
        iteration++;
    } while (iteration < 2 * specIteration);

    long end = System.currentTimeMillis();
    System.out.println("running time " + (end - start) / 1000 + "s");
}

From source file:IndexWords.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        return -1;
    }/*from  ww  w  .  java 2s . c om*/

    checkWords = new String[args.length - 2];

    int numIter = 5;

    Path input = new Path(args[0]);

    for (int i = 0; i < numIter; i++) {
        JobConf conf = new JobConf(getConf(), IndexWords.class);
        conf.setJobName("indexwords");

        conf.setInputFormat(KeyValueTextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(MapClass.class);
        conf.setReducerClass(Reduce.class);

        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, new Path(args[1] + Integer.toString(i)));

        RunningJob rj = JobClient.runJob(conf);
        input = new Path(args[1] + Integer.toString(i));
        double resVal = rj.getCounters().getCounter(RecordCounters.RESIDUAL_COUNTER) * 1.0 / 10000;
        System.out.println(N + " " + (resVal / (1.0 * N)));
        if (resVal / (1.0 * N) < 0.001)
            break;
    }

    return 0;
}

From source file:ClimateData.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(ClimateData.class);
    conf.setJobName("climatedata");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//from  ww  w  .j av a  2  s . co  m
}

From source file:FormatStorage2ColumnStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatStorage2ColumnStorageMR <input> <output>");
        System.exit(-1);/* ww  w  .j av a  2  s. c  o m*/
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("FormatStorage2ColumnStorageMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(FormatStorageMapper.class);
    conf.setReducerClass(ColumnStorageReducer.class);

    conf.setInputFormat(FormatStorageInputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:BP.java

License:Apache License

protected JobConf configUpdateMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Update_message");

    conf.setMapperClass(MapUpdateMessage.class);
    conf.setReducerClass(RedUpdateMessage.class);

    fs.delete(message_next_path, true);//from  w  w w  . j  ava 2s.  c om

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, message_next_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configCheckErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Check Err");

    fs.delete(check_error_path, true);/*from ww  w  .j  av a 2 s.  co m*/

    conf.setMapperClass(MapCheckErr.class);
    conf.setReducerClass(RedCheckErr.class);

    FileInputFormat.setInputPaths(conf, message_cur_path, message_next_path);
    FileOutputFormat.setOutputPath(conf, check_error_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}