Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:edu.udel.mxv.Mxv.java

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println(USAGE);
        System.exit(1);// w w w.  jav a  2 s. c o m
    }

    int n = Integer.parseInt(args[0]);
    String input_matrix = args[1];
    String input_vector = args[2];
    String output = args[3];

    Configuration conf = getConf();
    conf.set("vector.path", input_vector);
    conf.setInt("vector.n", n);

    Job job = new Job(conf);
    job.setJobName("mxv");
    job.setJarByClass(getClass());

    // mapper
    job.setMapperClass(MxvMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // reducer
    job.setReducerClass(MxvRed.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    //        job.setNumReduceTasks(num_red);

    FileInputFormat.addInputPath(job, new Path(input_matrix));
    FileOutputFormat.setOutputPath(job, new Path(output));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:edu.umd.cloud9.collection.ExtractHTMLFieldCollection.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override/*from  ww w  . j  a v a 2  s  .  c o m*/
public int runTool() throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);

    String inputPath = conf.get("Cloud9.InputPath");
    String inputFormat = conf.get("Cloud9.InputFormat");
    String outputPath = conf.get("Cloud9.OutputPath");
    String tag = conf.get("Cloud9.TargetTag");

    job.setJobName("ExtractFieldCollection");

    job.setJarByClass(ExtractHTMLFieldCollection.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(200);

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(inputFormat));
    recursivelyAddInputPaths(job, inputPath);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(TextDocument.class);

    LOG.info("ExtractFieldCollection - " + tag);
    LOG.info(" - Input path: " + inputPath);
    LOG.info(" - Input format: " + inputFormat);
    LOG.info(" - Output path: " + outputPath);
    LOG.info(" - Target tag: " + tag);

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaWeightedLinkGraph.java

License:Apache License

private String phase1(String inputPath, int reduceNo, String lang)
        throws IOException, InterruptedException, ClassNotFoundException {

    String output = "tmp/wiki-link/phase1";

    Job job = Job.getInstance(getConf());
    job.setJobName("Build Wikipedia Weighted Link Graph. Phase 1");
    job.setJarByClass(BuildWikipediaWeightedLinkGraph.class);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");

    job.setNumReduceTasks(reduceNo);//  www . j  av  a 2  s .  c  o m

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(output));

    if ("en".equals(lang)) {
        job.setInputFormatClass(WikipediaPageInputFormat.class);
    } else
        throw new InterruptedException("Wikipedia dump with language " + lang + " is not supported ");

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfStringInt.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PairOfStringInt.class);

    job.setMapperClass(LinkEmitMapClass.class);
    job.setReducerClass(RedirectResolveReduceClass.class);

    job.waitForCompletion(true);

    return output;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaWeightedLinkGraph.java

License:Apache License

private String phase2(String inputPath, int reduceNo)
        throws IOException, InterruptedException, ClassNotFoundException {

    String output = "tmp/wiki-link/phase2";

    Job job = Job.getInstance(getConf());
    job.setJobName("Build Wikipedia Weighted Link Graph. Phase 2");
    job.setJarByClass(BuildWikipediaWeightedLinkGraph.class);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");

    job.setNumReduceTasks(reduceNo);//  w  w  w .j  ava 2 s .  c  o m

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfStringInt.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PairOfStringInt.class);

    job.setReducerClass(DestinationIdResolveReduceClass.class);

    job.waitForCompletion(true);

    return output;
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaWeightedLinkGraph.java

License:Apache License

private String phase3(String inputPath, int reduceNo)
        throws IOException, InterruptedException, ClassNotFoundException {

    String output = "trace/phase3";

    Job job = Job.getInstance(getConf());
    job.setJobName("Build Wikipedia Weighted Link Graph. Phase 3");
    job.setJarByClass(BuildWikipediaWeightedLinkGraph.class);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");

    job.setNumReduceTasks(reduceNo);/*from   w w  w  .ja  v  a 2 s.  c om*/

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfStringInt.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(SourceIdResolveReduceClass.class);

    job.waitForCompletion(true);

    return output;
}

From source file:edu.umd.cloud9.collection.wikipedia.CountWikipediaPages.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*from ww  w . j a  va  2 s. c  o  m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder
            .withArgName("en|sv|nl|de|fr|ru|it|es|vi|pl|ja|pt|zh|uk|ca|fa|no|fi|id|ar|sr|ko|hi|zh_yue|cs|tr")
            .hasArg().withDescription("two-letter or six-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = "en"; // Assume 'en' by default.
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (!(language.length() == 2 || language.length() == 6)) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - language: " + language);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(CountWikipediaPages.class);
    job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", INPUT_OPTION, inputPath,
            LANGUAGE_OPTION, language));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(inputPath));

    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }

    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.setMapperClass(MyMapper.class);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.DumpWikipediaToPlainText.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*from   ww w .  ja  v a2s  .  co m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(OptionBuilder
            .withArgName("en|sv|nl|de|fr|ru|it|es|vi|pl|ja|pt|zh|uk|ca|fa|no|fi|id|ar|sr|ko|hi|zh_yue|cs|tr")
            .hasArg().withDescription("two-letter or six-letter language code").create(LANGUAGE_OPTION));
    options.addOption(OptionBuilder.withArgName("TEXT|HTML|WIKI").hasArg()
            .withDescription("Output Content Type TEXT, HTML, WIKI").create(CONTENT_FORMAT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = "en"; // Assume "en" by default.
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (!(language.length() == 2 || language.length() == 6)) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String contentFormat = "TEXT"; // Assume "TEXT" by default.
    if (cmdline.hasOption(CONTENT_FORMAT_OPTION)) {
        contentFormat = cmdline.getOptionValue(CONTENT_FORMAT_OPTION);
        if (!contentFormat.equals("TEXT") && !contentFormat.equals("HTML") && !contentFormat.equals("WIKI")) {

            System.err.println("Error: \"" + contentFormat + "\" unknown content type!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - output path  : " + outputPath);
    LOG.info(" - language     : " + language);
    LOG.info(" - content_type : " + contentFormat);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJarByClass(DumpWikipediaToPlainText.class);
    job.setJobName(String.format("DumpWikipediaToPlainText[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION,
            inputPath, OUTPUT_OPTION, outputPath, LANGUAGE_OPTION, language, CONTENT_FORMAT_OPTION,
            contentFormat));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }
    if (contentFormat != null) {
        job.getConfiguration().set("wiki.content_format", contentFormat);
    }
    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.umd.cloud9.collection.wikipedia.ExtractWikipediaAnchorTextWithWindow.java

License:Apache License

private void task1(String inputPath, String outputPath)
        throws IOException, ClassNotFoundException, InterruptedException {
    LOG.info("Exracting anchor text (phase 1)...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(ExtractWikipediaAnchorTextWithWindow.class);
    job.setJobName(
            String.format("ExtractWikipediaAnchorText:phase1[input: %s, output: %s]", inputPath, outputPath));

    // 10 reducers is reasonable.
    job.setNumReduceTasks(10);/*  w  w  w .ja va2  s.  com*/

    // increase heap
    job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");

    job.getConfiguration().set("mapreduce.map.memory.mb", "6144");
    job.getConfiguration().set("mapreduce.reduce.memory.mb", "6144");
    job.getConfiguration().set("mapreduce.map.java.opts", "-Xmx6144m");
    job.getConfiguration().set("mapreduce.reduce.java.opts", "-Xmx6144m");
    job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(PairOfStrings.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PairOfIntString.class);

    job.setMapperClass(MyMapper1.class);
    job.setReducerClass(MyReducer1.class);
    job.setPartitionerClass(MyPartitioner1.class);

    // Delete the output directory if it exists already.
    FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);
}

From source file:edu.umd.cloud9.collection.wikipedia.ExtractWikipediaAnchorTextWithWindow.java

License:Apache License

private void task2(String inputPath, String outputPath)
        throws IOException, ClassNotFoundException, InterruptedException {
    LOG.info("Exracting anchor text (phase 2)...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(ExtractWikipediaAnchorTextWithWindow.class);
    job.setJobName(
            String.format("ExtractWikipediaAnchorText:phase2[input: %s, output: %s]", inputPath, outputPath));

    // Gathers everything together for convenience; feasible for Wikipedia.
    job.setNumReduceTasks(1);/*  w  w w  .ja v  a 2s.com*/

    // increase heap
    job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");

    job.getConfiguration().set("mapreduce.map.memory.mb", "6144");
    job.getConfiguration().set("mapreduce.reduce.memory.mb", "6144");
    job.getConfiguration().set("mapreduce.map.java.opts", "-Xmx6144m");
    job.getConfiguration().set("mapreduce.reduce.java.opts", "-Xmx6144m");
    job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper2.class);
    job.setReducerClass(MyReducer2.class);

    // Delete the output directory if it exists already.
    FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    // Clean up intermediate data.
    FileSystem.get(job.getConfiguration()).delete(new Path(inputPath), true);
}

From source file:edu.umd.cloud9.collection.wikipedia.RepackWikipedia.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*from   w ww .ja  va  2  s  . co  m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location")
            .create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file")
            .create(MAPPING_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("block|record|none").hasArg()
            .withDescription("compression type").create(COMPRESSION_TYPE_OPTION));
    options.addOption(OptionBuilder
            .withArgName("en|sv|nl|de|fr|ru|it|es|vi|pl|ja|pt|zh|uk|ca|fa|no|fi|id|ar|sr|ko|hi|zh_yue|cs|tr")
            .hasArg().withDescription("two-letter or six-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION);
    String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION);

    if (!"block".equals(compressionType) && !"record".equals(compressionType)
            && !"none".equals(compressionType)) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (!(language.length() == 2 || language.length() == 6)) {
            // Added length check for 6 to include languages like zh_yue
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    // this is the default block size
    int blocksize = 1000000;

    Job job = Job.getInstance(getConf());
    job.setJarByClass(RepackWikipedia.class);
    job.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language));

    job.getConfiguration().set(DOCNO_MAPPING_FIELD, mappingFile);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - compression type: " + compressionType);
    LOG.info(" - language: " + language);

    if ("block".equals(compressionType)) {
        LOG.info(" - block size: " + blocksize);
    }

    job.setNumReduceTasks(0);

    SequenceFileInputFormat.addInputPath(job, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));

    if ("none".equals(compressionType)) {
        SequenceFileOutputFormat.setCompressOutput(job, false);
    } else {
        SequenceFileOutputFormat.setCompressOutput(job, true);

        if ("record".equals(compressionType)) {
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
            job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }

    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WikipediaPageFactory.getWikipediaPageClass(language));

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}