Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:com.yahoo.semsearch.fastlinking.utils.RunFELOntheGrid.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    //Job job = Job.getInstance( conf );
    job.setJarByClass(RunFELOntheGrid.class);
    // Process custom command-line options
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Specify various job-specific parameters
    job.setJobName("Entity Linker");
    job.setNumReduceTasks(100);
    job.setJarByClass(RunFELOntheGrid.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setMapperClass(FELMapper.class);
    job.setReducerClass(FELReducer.class);
    job.setCombinerClass(FELReducer.class);

    job.waitForCompletion(true);/*w ww .  ja  va  2s. c om*/

    return 0;
}

From source file:com.yahoo.semsearch.fastlinking.w2v.EntityEmbeddings.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    //Job job = Job.getInstance( conf );
    job.setJarByClass(EntityEmbeddings.class);
    // Process custom command-line options
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Specify various job-specific parameters
    job.setJobName("Entity embeddings");
    job.setNumReduceTasks(1);
    job.setJarByClass(EntityEmbeddings.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(EntityEMapper.class);
    job.setReducerClass(EntityEReducer.class);
    job.setCombinerClass(EntityEReducer.class);

    job.waitForCompletion(true);//from w  w w  .j  a v a  2  s  .c  om

    return 0;
}

From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java

License:Apache License

/**
 * Parameters for bulk loader specified through the config file:
 *
 * - prefix for the row keys// ww w  .  ja  v a2  s .c  om
 * - range start
 * - range end (inclusive)
 * - num splits (or number of partitions).
 * - user
 * - password
 * - table
 *
 * For the accepted default options
 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
 */
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    Util.printArgs("run", args, System.err);
    printKeyValues(conf, ARG_KEYS, System.err);

    if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) {
        System.err.println("Usage: " + this.getClass().getName()
                + "input_path [generic options] [input_paths...] ouptut_path");
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }

    // Time run
    long startTime = System.currentTimeMillis();
    String workdir;

    if (args.length == 1) {
        /* override workdir in the config if it is specified in the
         * command line
         */
        conf.set(ARG_KEY_OUTDIR, args[0]);
        workdir = args[0];
    }

    workdir = conf.get(ARG_KEY_OUTDIR);

    if (workdir == null) {
        System.err.println("No output directory specified");
        return 1;
    }

    /* Initialize job, check parameters and decide which mapper to use */
    Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator"));

    /* these settings are the same (i.e., fixed) independent of the
     * parameters */
    job.setJarByClass(this.getClass());
    // job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);

    /* these settings should depend on the type of output file */
    job.setOutputFormatClass(HFileOutputFormat.class);
    /* not sure the next two are needed */
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    this.createInputFile(job, workdir);

    HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files"));

    /* depending on whether the keys need to be sorted and hashed, then
     * decide which mapper and reducer to use 
     */
    boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false);
    boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true);

    /* get splits file name: side-effect -> this may generate a splits file  */
    String splitsfile = this.getSplitsFile(job, workdir);

    if (sortKeys && hashKeys) { /* do a full map reduce job */
        job.setMapperClass(RowGeneratorMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setPartitionerClass(RangePartitioner.class);

        if (splitsfile == null) {
            /* Auto generate the splits file either from:
             * - the input key ranges
             * - from the current table splits
             */
            throw new InvalidInputException("No splits specified");
        }

        /* Set splits file */
        RangePartitioner.setSplitFile(job, splitsfile);

        /* Add reducer (based on mapper code) */
        job.setReducerClass(RowGeneratorReduce.class);

        /* the number of reducers is dependent on the number of
         * partitions
         */
        int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1);
        job.setNumReduceTasks(numReduce);
    } else { /* perform a map only job */
        job.setMapperClass(RowGeneratorMapOnly.class);
        /* map output key and value types are the same as
         * for the job
         */
        job.setMapOutputKeyClass(job.getOutputKeyClass());
        job.setMapOutputValueClass(job.getOutputValueClass());
        job.setNumReduceTasks(0);
    }

    job.waitForCompletion(true);

    //        JobClient.runJob(conf);
    SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z");
    SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS");
    ddf.setTimeZone(TimeZone.getTimeZone("UTC"));
    long endTime = System.currentTimeMillis();
    System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime);
    System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime);
    System.out
            .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime));
    return 0;
}

From source file:com.yourcompany.hadoop.mapreduce.aggregate.UnionDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job();

    parseArguements(args, job);//from  w  w w .j  a v  a 2 s. c  om

    job.setJarByClass(UnionDriver.class);

    // Mapper Class
    job.setMapperClass(UnionMapper.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Reducer Task
    job.setNumReduceTasks(1);

    // Run a Hadoop Job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {
    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        } else if ("-exactMatch".equals(args[i])) {
            job.getConfiguration().set("exactMatch", args[++i]);
        } else if ("-bigrammable".equals(args[i])) {
            job.getConfiguration().set("bigrammable", args[++i]);
        } else if ("-hasOrigin".equals(args[i])) {
            job.getConfiguration().set("hasOrigin", args[++i]);
        } else if ("-originCNoun".equals(args[i])) {
            job.getConfiguration().set("originCNoun", args[++i]);
        } else if ("-reducer".equals(args[i])) {
            job.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else if ("-minSupport".equals(args[i])) {
            job.getConfiguration().set("minSupport", args[++i]);
        }/*  w w w  .java  2 s  .c  o m*/
    }
}

From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job();
    parseArguements(args, job);//  w w  w .  j a  va 2  s  .  co m

    job.setJarByClass(LexicalAnalyzerDriver.class);

    // Mapper & Reducer Class
    job.setMapperClass(LexicalAnalyzerMapper.class);

    // Mapper Output Key & Value Type after Hadoop 0.20
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    // Run a Hadoop Job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {
    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        } else if ("-indexmode".equals(args[i])) {
            job.getConfiguration().set("indexmode", args[++i]);
        } else if ("-reducer".equals(args[i])) {
            job.setNumReduceTasks(Integer.parseInt(args[++i]));
        }/*  w  ww.j a v a 2  s  .  c om*/
    }
}

From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java

License:Apache License

/**
 * Ensures that the given number of reduce tasks for the given job
 * configuration does not exceed the number of regions for the given table.
 * //from   w  w  w .j a  v  a 2s  . c om
 * @param table
 *            The table to get the region count for.
 * @param job
 *            The current job to adjust.
 * @throws IOException
 *             When retrieving the table details fails.
 */
public static void limitNumReduceTasks(String table, Job job) throws IOException {
    HTable outputTable = new HTable(job.getConfiguration(), table);
    int regions = outputTable.getRegionsInfo().size();
    if (job.getNumReduceTasks() > regions)
        job.setNumReduceTasks(regions);
}

From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java

License:Apache License

/**
 * Sets the number of reduce tasks for the given job configuration to the
 * number of regions the given table has.
 * //from   w  w w.jav  a2s.  co  m
 * @param table
 *            The table to get the region count for.
 * @param job
 *            The current job to adjust.
 * @throws IOException
 *             When retrieving the table details fails.
 */

public static void setNumReduceTasks(String table, Job job) throws IOException {
    HTable outputTable = new HTable(job.getConfiguration(), table);
    int regions = outputTable.getRegionsInfo().size();
    job.setNumReduceTasks(regions);
}

From source file:connected.components.HashToMin.java

License:Open Source License

protected Job jobConfig() throws IOException {
    Job job = new Job(new Configuration(), "iteration");
    job.setJarByClass(HashToMin.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    return job;/*  w w w  . jav  a2 s  .  com*/
}