Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:com.rockstor.compact.RecoveryTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(RecoveryTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(RecoveryMapper.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK!");
    return job;/*from  w w  w  .ja  va 2 s. co  m*/
}

From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java

License:Apache License

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];/*w  w  w . ja  v  a 2s  .  c om*/

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(FindBadMOBReferences.class);

    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setBatch(10);
    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
    scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
    scan.addFamily(Bytes.toBytes("J"));
    //scan.setRowPrefixFilter(Bytes.toBytes("a00"));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    // job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class,
            Text.class, job);

    //job.setNumReduceTasks(0);
    job.setReducerClass(FindBadMOBReferencesReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    FileOutputFormat.setOutputPath(job, new Path("/tmp/out"));
    return job;
}

From source file:com.sa.npopa.samples.hbase.myMR.java

License:Apache License

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];/*from   w  w  w  .  java2s.  co  m*/

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(myMR.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setBatch(10);

    //scan.setFilter(new FirstKeyOnlyFilter()); //need to find another filter like key only.
    scan.setFilter(new KeyOnlyFilter());

    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class,
            Result.class, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sa.npopa.samples.hbase.RowCounter.java

License:Apache License

/**
 * Sets up the actual job./* ww w.  j  a  v a2s. co  m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String startKey = null;
    String endKey = null;
    long startTime = 0;
    long endTime = 0;

    StringBuilder sb = new StringBuilder();

    final String rangeSwitch = "--range=";
    final String startTimeArgKey = "--starttime=";
    final String endTimeArgKey = "--endtime=";
    final String expectedCountArg = "--expected-count=";

    // First argument is table name, starting from second
    for (int i = 1; i < args.length; i++) {
        if (args[i].startsWith(rangeSwitch)) {
            String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2);
            if (startEnd.length != 2 || startEnd[1].contains(",")) {
                printUsage("Please specify range in such format as \"--range=a,b\" "
                        + "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
                return null;
            }
            startKey = startEnd[0];
            endKey = startEnd[1];
            continue;
        }
        if (args[i].startsWith(startTimeArgKey)) {
            startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(endTimeArgKey)) {
            endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
            continue;
        }
        if (args[i].startsWith(expectedCountArg)) {
            conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length())));
            continue;
        }
        // if no switch, assume column names
        sb.append(args[i]);
        sb.append(" ");
    }
    if (endTime < startTime) {
        printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
        return null;
    }

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(RowCounter.class);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    if (startKey != null && !startKey.equals("")) {
        scan.setStartRow(Bytes.toBytes(startKey));
    }
    if (endKey != null && !endKey.equals("")) {
        scan.setStopRow(Bytes.toBytes(endKey));
    }
    if (sb.length() > 0) {
        for (String columnName : sb.toString().trim().split(" ")) {
            String family = StringUtils.substringBefore(columnName, ":");
            String qualifier = StringUtils.substringAfter(columnName, ":");

            if (StringUtils.isBlank(qualifier)) {
                scan.addFamily(Bytes.toBytes(family));
            } else {
                scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            }
        }
    }
    scan.setFilter(new FirstKeyOnlyFilter());
    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class,
            Result.class, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sanjay.mapreduce.SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from ww w  .  ja  v a2 s .  c o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(WordPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(5);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler from the command line. Configures a JobConf
 * instance and calls {@link #writePartitionFile}.
 *//*w  w w  .  ja v  a 2  s.  c om*/
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;

    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-inFormat".equals(args[i])) {
                job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
            } else if ("-keyClass".equals(args[i])) {
                job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
            } else if ("-splitSample".equals(args[i])) {
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new SplitSampler<K, V>(numSamples, maxSplits);
            } else if ("-splitRandom".equals(args[i])) {
                System.out.println("Random sampling");
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else if ("-splitInterval".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    if (job.getNumReduceTasks() <= 1) {
        System.err.println("Sampler requires more than one reducer");
        return printUsage();
    }
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    if (null == sampler) {
        sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }
    System.out.println("before paths");
    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
        FileInputFormat.addInputPath(job, new Path(s));
    }
    MapOutputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
}

From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java

License:Apache License

/**
 * Sets up the actual job.//from   ww w. ja va  2 s .  co  m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];

    conf.set("mapred.map.tasks.speculative.execution", "false");

    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClass(RollbackUpdatesMapper.class);
    // TODO: Allow passing filter and subset of rows/columns.
    Scan s = new Scan();
    // Optional arguments.
    long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L;
    long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE;

    // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may
    //       not be good way to do if tss are artificial in HutPuts though
    //    s.setTimeRange(startTime, endTime);
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR,
            String.valueOf(startTime));
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR,
            String.valueOf(endTime));

    s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime));

    s.setCacheBlocks(false);
    // TODO: allow user change using job params
    s.setCaching(512);
    s.setCacheBlocks(false);

    LOG.info("Using scan: " + s.toString());

    // TODO: allow better limiting of data to be fetched
    if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
        s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
    }

    LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")"
            + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")");

    TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job);
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.sematext.hbase.hut.UpdatesProcessingMrJob.java

License:Apache License

/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job./*from   w w w  .  j  a  va  2s .  co m*/
 *
 * @param table  The table name.
 * @param scan  The scan with the columns to scan.
 * @param up update processor implementation
 * @param job  The job configuration.
 * @throws java.io.IOException When setting up the job fails.
 */
@SuppressWarnings("unchecked")
public static void initJob(String table, Scan scan, UpdateProcessor up, Job job) throws IOException {
    TableMapReduceUtil.initTableMapperJob(table, scan, UpdatesProcessingMapper.class, null, null, job);
    job.setJarByClass(UpdatesProcessingMrJob.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);

    job.getConfiguration().set(UpdatesProcessingMapper.HTABLE_NAME_ATTR, table);
    job.getConfiguration().set(UpdatesProcessingMapper.HUT_PROCESSOR_CLASS_ATTR, up.getClass().getName());
    job.getConfiguration().set(UpdatesProcessingMapper.HUT_PROCESSOR_DETAILS_ATTR,
            convertUpdateProcessorToString(up));

    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); // TODO: explain
}

From source file:com.sematext.hbase.wd.RowKeyDistributorTestBase.java

License:Apache License

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
        int seekIntervalMinValue, int seekIntervalMaxValue)
        throws IOException, InterruptedException, ClassNotFoundException {
    int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue,
            seekIntervalMinValue, seekIntervalMaxValue);

    // Reading data
    Configuration conf = testingUtility.getConfiguration();
    Job job = new Job(conf, "testMapReduceInternal()-Job");
    job.setJarByClass(this.getClass());
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);

    // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...)
    job.setInputFormatClass(WdTableInputFormat.class);
    keyDistributor.addInfo(job.getConfiguration());

    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);

    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);/*from w w w .  j a va  2  s. c  om*/

    long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
    Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);
}

From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/* w  w  w  .  ja v  a 2 s .co m*/
 */
public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir,
        Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    //setup job conf
    job.setJobName(jobName);
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        fs.delete(tmpDir, true);
        //      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
        //          + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    //  try {
    //generate an input file for each map task
    for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                LongWritable.class, CompressionType.NONE);
        try {
            writer.append(offset, size);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
    }

    //start a map/reduce job
    System.out.println("Starting Job");
    final long startTime = System.currentTimeMillis();
    job.submit();
    //      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
    //      System.out.println("Job Finished in " + duration + " seconds");
    return job.getJobID();

    //    } finally {
    //      fs.delete(tmpDir, true);
    //    }
}