Example usage for org.apache.hadoop.mapred JobConf setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJobName.

Prototype

public void setJobName(String name)

Source Link

Document

Set the user-specified job name.

Usage

From source file:edu.umd.cloud9.collection.spinn3r.DemoCountSpinn3rEnglishPosts.java

License:Apache License

/**
 * Runs this tool./*from w w w  .ja v  a2s  .co m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    sLogger.info("input dir: " + inputPath);
    sLogger.info("output dir: " + outputPath);

    JobConf conf = new JobConf(DemoCountSpinn3rEnglishPosts.class);
    conf.setJobName("DemoCountSpinn3rEnglishPosts");

    conf.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(Spinn3rItemInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    // clean up
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return 0;
}

From source file:edu.umd.cloud9.collection.trec.BuildTrecForwardIndex.java

License:Apache License

/**
 * Runs this tool./* w  ww. ja  v a 2 s.c o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(getConf(), BuildTrecForwardIndex.class);
    FileSystem fs = FileSystem.get(getConf());

    String collectionPath = args[0];
    String outputPath = args[1];
    String indexFile = args[2];
    String mappingFile = args[3];

    LOG.info("Tool name: " + BuildTrecForwardIndex.class.getCanonicalName());
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - mapping file: " + mappingFile);

    conf.setJobName(BuildTrecForwardIndex.class.getSimpleName());

    conf.set("mapred.child.java.opts", "-Xmx1024m");
    conf.setNumReduceTasks(1);

    if (conf.get("mapred.job.tracker").equals("local")) {
        conf.set(DOCNO_MAPPING_FILE_PROPERTY, mappingFile);
    } else {
        DistributedCache.addCacheFile(new URI(mappingFile), conf);
    }

    FileInputFormat.setInputPaths(conf, new Path(collectionPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(TrecDocumentInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getCounter();

    String inputFile = outputPath + "/" + "part-00000";

    LOG.info("Writing " + numDocs + " doc offseta to " + indexFile);
    FSLineReader reader = new FSLineReader(inputFile, fs);

    FSDataOutputStream writer = fs.create(new Path(indexFile), true);

    writer.writeUTF(edu.umd.cloud9.collection.trec.TrecForwardIndex.class.getCanonicalName());
    writer.writeUTF(collectionPath);
    writer.writeInt(numDocs);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\t");
        long offset = Long.parseLong(arr[1]);
        int len = Integer.parseInt(arr[2]);

        writer.writeLong(offset);
        writer.writeInt(len);

        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " docs");
        }
    }
    reader.close();
    writer.close();
    LOG.info(cnt + " docs total. Done!");

    if (numDocs != cnt) {
        throw new RuntimeException("Unexpected number of documents in building forward index!");
    }

    return 0;
}

From source file:edu.umd.cloud9.collection.trec.DemoCountTrecDocuments.java

License:Apache License

/**
 * Runs this tool.//from w  w  w  . j  av a 2s.c om
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    String mappingFile = args[2];

    LOG.info("Tool: " + DemoCountTrecDocuments.class.getCanonicalName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output dir: " + outputPath);
    LOG.info(" - docno mapping file: " + mappingFile);

    JobConf conf = new JobConf(getConf(), DemoCountTrecDocuments.class);
    conf.setJobName(DemoCountTrecDocuments.class.getSimpleName());

    conf.setNumReduceTasks(0);

    // Pass in the class name as a String; this is makes the mapper general in being able to load
    // any collection of Indexable objects that has docid/docno mapping specified by a DocnoMapping
    // object.
    conf.set("DocnoMappingClass", edu.umd.cloud9.collection.trec.TrecDocnoMapping.class.getCanonicalName());

    // Put the mapping file in the distributed cache so each map worker will have it.
    DistributedCache.addCacheFile(new URI(mappingFile), conf);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(TrecDocumentInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.collection.trec.NumberTrecDocuments.java

License:Apache License

/**
 * Runs this tool.//  w w  w .  j a  va  2 s. c o  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    String outputFile = args[2];

    LOG.info("Tool: " + NumberTrecDocuments.class.getCanonicalName());
    LOG.info(" - Input path: " + inputPath);
    LOG.info(" - Output path: " + outputPath);
    LOG.info(" - Output file: " + outputFile);

    JobConf conf = new JobConf(getConf(), NumberTrecDocuments.class);
    conf.setJobName(NumberTrecDocuments.class.getSimpleName());

    conf.setNumMapTasks(100); // Arbitrary number; doesn't matter.
    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(TrecDocumentInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    String input = outputPath + (outputPath.endsWith("/") ? "" : "/") + "/part-00000";
    TrecDocnoMapping.writeMappingData(new Path(input), new Path(outputFile), FileSystem.get(getConf()));

    return 0;
}

From source file:edu.umd.cloud9.collection.trecweb.NumberTrecWebDocuments.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.out.println("usage: [input] [output-dir] [output-file] [num-mappers]");
        System.exit(-1);//from w ww .  j  a  v  a  2 s.  c  om
    }

    String inputPath = args[0];
    String outputPath = args[1];
    String outputFile = args[2];
    int mapTasks = Integer.parseInt(args[3]);

    LOG.info("Tool name: " + NumberTrecWebDocuments.class.getCanonicalName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - number of mappers: " + mapTasks);

    JobConf conf = new JobConf(getConf(), NumberTrecWebDocuments.class);
    conf.setJobName(NumberTrecWebDocuments.class.getSimpleName());

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    writeMappingData(new Path(outputPath + "/part-00000"), new Path(outputFile), FileSystem.get(conf));

    return 0;
}

From source file:edu.umd.cloud9.collection.trecweb.RepackGov2Documents.java

License:Apache License

/**
 * Runs this tool./*ww  w  .ja v a 2 s.c  om*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String basePath = args[0];
    String outputPath = args[1];
    String compressionType = args[2];

    if (!compressionType.equals("block") && !compressionType.equals("record")
            && !compressionType.equals("none")) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        System.exit(-1);
    }

    // this is the default block size
    int blocksize = 1000000;

    JobConf conf = new JobConf(RepackGov2Documents.class);
    conf.setJobName("RepackGov2Documents");

    sLogger.info("Tool name: RepackGov2Documents");
    sLogger.info(" - base path: " + basePath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - compression type: " + compressionType);

    if (compressionType.equals("block")) {
        sLogger.info(" - block size: " + blocksize);
    }

    int mapTasks = 10;

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(500);

    // 272
    for (int i = 0; i <= 272; i++) {
        String path = basePath + "/GX";
        String indexNum = Integer.toString(i);
        if (indexNum.length() == 1) {
            path += "00";
        }
        if (indexNum.length() == 2) {
            path += "0";
        }
        path += indexNum;
        FileInputFormat.addInputPath(conf, new Path(path));
    }

    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    if (compressionType.equals("none")) {
        SequenceFileOutputFormat.setCompressOutput(conf, false);
    } else {
        SequenceFileOutputFormat.setCompressOutput(conf, true);

        if (compressionType.equals("record")) {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
            conf.setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    conf.setInputFormat(TrecWebDocumentInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(TrecWebDocument.class);

    conf.setMapperClass(MyMapper.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.collection.trecweb.RepackWt10gDocuments.java

License:Apache License

/**
 * Runs this tool.// w  w  w . jav a  2s  . co m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String basePath = args[0];
    String outputPath = args[1];
    String compressionType = args[2];

    if (!compressionType.equals("block") && !compressionType.equals("record")
            && !compressionType.equals("none")) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        System.exit(-1);
    }

    // this is the default block size
    int blocksize = 1000000;

    JobConf conf = new JobConf(RepackWt10gDocuments.class);
    conf.setJobName("RepackWt10gDocuments");

    sLogger.info("Tool name: RepackWt10gDocuments");
    sLogger.info(" - base path: " + basePath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - compression type: " + compressionType);

    if (compressionType.equals("block")) {
        sLogger.info(" - block size: " + blocksize);
    }

    int mapTasks = 10;

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(50);

    for (int i = 1; i <= 104; i++) {
        String path = basePath + "/WTX";
        String indexNum = Integer.toString(i);
        if (indexNum.length() == 1) {
            path += "00";
        }
        if (indexNum.length() == 2) {
            path += "0";
        }
        path += indexNum;
        FileInputFormat.addInputPath(conf, new Path(path));
    }

    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    if (compressionType.equals("none")) {
        SequenceFileOutputFormat.setCompressOutput(conf, false);
    } else {
        SequenceFileOutputFormat.setCompressOutput(conf, true);

        if (compressionType.equals("record")) {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
            conf.setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    conf.setInputFormat(TrecWebDocumentInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(TrecWebDocument.class);

    conf.setMapperClass(MyMapper.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaDocnoMapping.java

License:Apache License

@SuppressWarnings("static-access")
@Override//w  ww  .ja  v a  2  s.c o  m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("tmp output directory")
            .create(OUTPUT_PATH_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file")
            .create(OUTPUT_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));
    options.addOption(KEEP_ALL_OPTION, false, "keep all pages");

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_PATH_OPTION)
            || !cmdline.hasOption(OUTPUT_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_PATH_OPTION);
    String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION);
    boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - keep all pages: " + keepAll);
    LOG.info(" - language: " + language);

    JobConf conf = new JobConf(getConf(), BuildWikipediaDocnoMapping.class);
    conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language));

    conf.setBoolean(KEEP_ALL_OPTION, keepAll);
    if (language != null) {
        conf.set("wiki.language", language);
    }
    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    RunningJob job = JobClient.runJob(conf);
    Counters c = job.getCounters();
    long cnt = keepAll ? c.getCounter(PageTypes.TOTAL) : c.getCounter(PageTypes.ARTICLE);

    WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(conf), outputPath + "/part-00000", (int) cnt,
            outputFile);

    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaForwardIndex.java

License:Apache License

@SuppressWarnings("static-access")
@Override//from w  w  w. j  a v  a 2  s.  co m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("tmp output directory")
            .create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    JobConf conf = new JobConf(getConf(), BuildWikipediaForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");
    LOG.info(" - language: " + language);

    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    fs.delete(new Path(outputPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(outputPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF("edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex");
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);

        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);

        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }

    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaLinkGraph.java

License:Apache License

private void task1(String inputPath, String outputPath, int partitions) throws IOException {
    LOG.info("Exracting edges...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    JobConf conf = new JobConf(getConf(), BuildWikipediaLinkGraph.class);
    conf.setJobName(String.format("BuildWikipediaLinkGraph:Edges[input: %s, output: %s, num_partitions: %d]",
            inputPath, outputPath, partitions));

    conf.setNumReduceTasks(partitions);// ww w . j  av  a 2 s . co  m

    SequenceFileInputFormat.addInputPath(conf, new Path(inputPath));
    TextOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(PairOfStringInt.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper1.class);
    conf.setReducerClass(MyReducer1.class);
    conf.setPartitionerClass(MyPartitioner1.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);
}