Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:edu.umd.cloud9.example.hbase.HBaseWordCount.java

License:Apache License

/**
 * Runs this tool.//w  w w.j a va2s.  c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    // If the table doesn't already exist, create it.
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we're ready to start running MapReduce.
    LOG.info("Tool: " + HBaseWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(HBaseWordCount.class.getSimpleName());
    job.setJarByClass(HBaseWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.simple.DemoWordCountJson.java

License:Apache License

/**
 * Runs this tool.// w ww.j  a  va 2s .co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int numReduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool: " + DemoWordCountJson.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + numReduceTasks);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(DemoWordCountJson.class.getSimpleName());
    job.setJarByClass(DemoWordCountJson.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(MyKey.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.simple.DemoWordCountTuple1.java

License:Apache License

/**
 * Runs this tool.//from w  w  w . j  av a 2  s.co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int numReduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool: " + DemoWordCountTuple1.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + numReduceTasks);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(DemoWordCountTuple1.class.getSimpleName());
    job.setJarByClass(DemoWordCountTuple1.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(PairOfStringInt.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.simple.DemoWordCountTuple2.java

License:Apache License

/**
 * Runs this tool.//from ww w  .  j a va2  s  .c  om
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int numReduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool: " + DemoWordCountTuple2.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + numReduceTasks);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(DemoWordCountTuple2.class.getSimpleName());
    job.setJarByClass(DemoWordCountTuple2.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(PairOfStringInt.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.simple.WordCountM.java

License:Apache License

/**
 * Runs this tool./*  w  w  w  . j  a  v a  2s  . co m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + WordCountM.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(WordCountM.class.getSimpleName());
    job.setJarByClass(WordCountM.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.webgraph.driver.wt10g.GenericExtractLinks.java

License:Apache License

@Override
public int runTool() throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf);

    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(new Path(mappingFile))) {
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");
    }//w  w w.jav  a  2s .co m

    DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration());

    job.setJobName("ExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);

    job.setNumReduceTasks(numReducers);

    job.setMapperClass(GenericExtractLinks.Map.class);
    job.setCombinerClass(GenericExtractLinks.Reduce.class);
    job.setReducerClass(GenericExtractLinks.Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ArrayListWritable.class);

    configer.applyJobConfig(job);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    recursivelyAddInputPaths(job, inputPath);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.umd.cloud9.webgraph.TrecExtractLinks.java

License:Apache License

@Override
public int runTool() throws Exception {

    Configuration conf = getConf();
    conf.set("mapred.child.java.opts", "-Xmx3072m");
    conf.setInt("mapred.task.timeout", 60000000);
    Job job = new Job(conf);

    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(new Path(mappingFile))) {
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");
    }//  ww w  .  j  av a 2s .com

    DistributedCache.addCacheFile(new Path(mappingFile).toUri(), job.getConfiguration());

    job.setJobName("ExtractLinks");
    job.setNumReduceTasks(numReducers);

    job.setJarByClass(TrecExtractLinks.class);
    job.setMapperClass(TrecExtractLinks.Map.class);
    job.setCombinerClass(TrecExtractLinks.Reduce.class);
    job.setReducerClass(TrecExtractLinks.Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ArrayListWritable.class);

    configer.applyJobConfig(job);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    recursivelyAddInputPaths(job, inputPath);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.umd.gorden2.PairsPMI.java

License:Apache License

/**
 * Runs this tool.//from  www  .j  av a2 s.c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + PairsPMI.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    //
    // Pair job
    Job job = Job.getInstance(getConf());
    job.setJobName(PairsPMI.class.getSimpleName());
    job.setJarByClass(PairsPMI.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    //job.setPartitionerClass(MyPartitioner.class);

    job.addCacheFile(new URI("wc/part-r-00000"));

    long startTime = System.currentTimeMillis();

    //
    // wordcount job
    Job job2 = Job.getInstance(getConf());
    job2.setJobName("Wordcount");
    job2.setJarByClass(PairsPMI.class);
    String outputPath2 = "wc";

    // Delete the output directory if it exists already.
    Path outputDir2 = new Path(outputPath2);
    FileSystem.get(getConf()).delete(outputDir2, true);

    job2.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job2, new Path(inputPath));
    FileOutputFormat.setOutputPath(job2, new Path(outputPath2));

    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(IntWritable.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);

    job2.setMapperClass(MyMapper2.class);
    job2.setCombinerClass(MyReducer2.class);
    job2.setReducerClass(MyReducer2.class);

    // add side file to job1
    job.addCacheFile(new URI("wc/part-r-00000"));

    job2.waitForCompletion(true);
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.gorden2.RunPersonalizedPageRankBasic.java

License:Apache License

private float[] phase1(int i, int j, String basePath, int numNodes, String m) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase1");
    job.setJarByClass(RunPersonalizedPageRankBasic.class);

    String in = basePath + "/iter" + formatter.format(i);
    String out = basePath + "/iter" + formatter.format(j) + "t";
    String outm = out + "-mass";

    // We need to actually count the number of part files to get the number of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;//from  ww w.  ja v  a 2  s  . com
    }

    LOG.info("PageRank: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + numNodes);
    LOG.info("computed number of partitions: " + numPartitions);
    LOG.info("- sources: " + m);

    int numReduceTasks = numPartitions;

    job.getConfiguration().setInt("NodeCount", numNodes);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");
    job.getConfiguration().set("PageRankMassPath", outm);
    job.getConfiguration().setStrings("sources", m);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapClass.class);

    job.setCombinerClass(CombineClass.class);

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);
    FileSystem.get(getConf()).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    String[] mm = m.split(",");
    float[] mass = new float[mm.length];
    for (int x = 0; x < mm.length; x++) {
        mass[x] = (float) StrictMath.log(0);
    }

    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        float[] tMass = new float[mm.length];
        for (int y = 0; y < mm.length; y++)
            tMass[y] = fin.readFloat();
        mass = sumListLogProbs(mass, tMass);
        fin.close();
    }
    LOG.info("- total Mass: " + Arrays.toString(mass));
    return mass;
}

From source file:edu.umd.gorden2.StripesPMI.java

License:Apache License

/**
 * Runs this tool.//from w ww  . j  a v a  2 s .  c  om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + StripesPMI.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(StripesPMI.class.getSimpleName());
    job.setJarByClass(StripesPMI.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapStFW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.addCacheFile(new URI("wc/part-r-00000"));

    //
    // wordcount job
    Job job2 = Job.getInstance(getConf());
    job2.setJobName("Wordcount");
    job2.setJarByClass(PairsPMI.class);
    String outputPath2 = "wc";

    // Delete the output directory if it exists already.
    Path outputDir2 = new Path(outputPath2);
    FileSystem.get(getConf()).delete(outputDir2, true);

    job2.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job2, new Path(inputPath));
    FileOutputFormat.setOutputPath(job2, new Path(outputPath2));

    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(IntWritable.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);

    job2.setMapperClass(MyMapper2.class);
    job2.setCombinerClass(MyReducer2.class);
    job2.setReducerClass(MyReducer2.class);

    long startTime = System.currentTimeMillis();
    job2.waitForCompletion(true);
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}