Example usage for org.apache.hadoop.mapred JobConf setMapRunnerClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapRunnerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapRunnerClass.

Prototype

public void setMapRunnerClass(Class<? extends MapRunnable> theClass) 

Source Link

Document

Expert: Set the MapRunnable class for the job.

Usage

From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java

License:Apache License

/**
 * Runs this tool.//w w  w . j av a  2 s . c om
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildTweetsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: TweetsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildTweetsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}