Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(Configuration conf, Class exampleClass) 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:SBP.java

License:Apache License

protected JobConf configInitMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Init_Belief");

    conf.setMapperClass(MapInitializeBelief.class);

    fs.delete(message_cur_path, true);/*from w w  w.j a  v  a  2 s.  c  om*/

    FileInputFormat.setInputPaths(conf, edge_path);
    FileOutputFormat.setOutputPath(conf, message_cur_path);

    conf.setNumReduceTasks(0);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    return conf;
}

From source file:SBP.java

License:Apache License

protected JobConf configUpdateMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Update_message");

    conf.setMapperClass(MapUpdateMessage.class);
    conf.setReducerClass(RedUpdateMessage.class);

    fs.delete(message_next_path, true);/*from   w ww  .j  av a2  s. c o  m*/

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, message_next_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:SBP.java

License:Apache License

protected JobConf configSmoothMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("smooth_lambda", "" + lambda);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Smooth_message");

    fs.delete(message_smooth_path, true);

    conf.setMapperClass(MapSmoothMessage.class);
    conf.setReducerClass(RedSmoothMessage.class);

    FileInputFormat.setInputPaths(conf, message_cur_path, message_next_path);
    FileOutputFormat.setOutputPath(conf, message_smooth_path);

    conf.setNumReduceTasks(nreducer);//w  w  w .j a v a 2 s . c o  m

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:SBP.java

License:Apache License

protected JobConf configCheckErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Check Err");

    fs.delete(check_error_path, true);/*from   w w  w  .  j  a va 2  s .  co m*/

    conf.setMapperClass(MapCheckErr.class);
    conf.setReducerClass(RedCheckErr.class);

    FileInputFormat.setInputPaths(conf, message_cur_path, message_smooth_path);
    FileOutputFormat.setOutputPath(conf, check_error_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:SBP.java

License:Apache License

protected JobConf configSumErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Sum Err");

    fs.delete(sum_error_path, true);//from  w w  w  .j  a v a 2s .  co  m

    conf.setMapperClass(MapSumErr.class);
    conf.setReducerClass(RedSumErr.class);

    FileInputFormat.setInputPaths(conf, check_error_path);
    FileOutputFormat.setOutputPath(conf, sum_error_path);

    conf.setNumReduceTasks(1);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:SBP.java

License:Apache License

protected JobConf configComputeBelief() throws Exception {
    final JobConf conf = new JobConf(getConf(), SBP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Compute_Belief");

    conf.setMapperClass(MapComputeBelief.class);
    conf.setReducerClass(RedComputeBelief.class);

    fs.delete(output_path, true);/*w  w  w .ja va  2s  .  c  om*/

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:WikipediaForwardIndexBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override//from  w  ww . j a  v a 2 s  .  co m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - language: " + language);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");

    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName());
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);

        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);

        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }

    // Clean up.
    fs.delete(new Path(tmpPath), true);

    return 0;
}

From source file:PiEstimator.java

License:Apache License

/**
 * Parse arguments and then runs a map/reduce job. Print output in standard
 * out.//from   www  . java2  s .  c  om
 * 
 * @return a non-zero if there is an error. Otherwise, return 0.
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    final int nMaps = Integer.parseInt(args[0]);
    final long nSamples = Long.parseLong(args[1]);

    System.out.println("Number of Maps  = " + nMaps);
    System.out.println("Samples per Map = " + nSamples);

    final JobConf jobConf = new JobConf(getConf(), getClass());
    System.out.println("Estimated value of Pi is " + estimate(nMaps, nSamples, jobConf));
    return 0;
}

From source file:DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];/*from  ww w  .  j a v a 2  s. c  o m*/
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:NgramMatrixBuilder.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                                         job tracker.
 *///from   w ww.  j  a v  a 2 s.com
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), NgramMatrixBuilder.class);
    conf.setJobName("ngrammatrixbuilder");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    TextInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}