Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:clustering.similarity.PreDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf(/*  ww  w  . j a  va 2 s  .c  o  m*/
                "usage: %s inverted_index_result_dir output_dir"
                        + " [compress_or_not] [reducer_number] [deci_number]\n",
                this.getClass().getSimpleName());
        System.exit(1);
    }
    Configuration conf = getConf();

    conf = MapReduceUtils.initConf(conf);
    conf.set("mapreduce.reduce.speculative", "false");

    // TODO: 17-4-24 calculate split number from reducer number
    conf.setInt("split.num", 8);

    if (args.length > 3) {
        conf.setInt("reducer.num", Integer.valueOf(args[3]));
    } else {
        conf.setInt("reducer.num", 29);
    }
    if (args.length > 4) {
        conf.setInt("deci.number", Integer.valueOf(args[4]));
    } else {
        conf.setInt("deci.number", 3);
    }

    Job job = Job.getInstance(conf, "pre job");
    job.setJarByClass(PreDriver.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setMapperClass(PreMapper.class);
    job.setMapOutputKeyClass(IntIntTupleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setPartitionerClass(PrePartitioner.class);

    job.setNumReduceTasks(conf.getInt("reducer.num", 29));
    job.setReducerClass(PreReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // set default compression
    if (args.length > 2 && args[2].equals("0")) {
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
    } else {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    }

    long starttime = System.currentTimeMillis();
    boolean complete = job.waitForCompletion(true);
    long endtime = System.currentTimeMillis();
    System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds");

    return complete ? 0 : 1;
}

From source file:co.cask.cdap.internal.app.runtime.batch.PartitionerWrapper.java

License:Apache License

/**
 * Wraps the partitioner defined in the job with this {@link PartitionerWrapper} if it is defined.
 * @param job The MapReduce job/*from www  .j av a 2 s.co  m*/
 */
public static void wrap(Job job) {
    if (WrapperUtil.setIfDefined(job, MRJobConfig.PARTITIONER_CLASS_ATTR, ATTR_CLASS)) {
        job.setPartitionerClass(PartitionerWrapper.class);
    }
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);/*from   w ww .j av a2  s . co  m*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//ww w.j  av a 2  s . co  m
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    Class<? extends Partitioner> topClass;
    try {
        topClass = getTotalOrderPartitionerClass();
    } catch (ClassNotFoundException e) {
        throw new IOException("Failed getting TotalOrderPartitioner", e);
    }
    //partition
    job.setPartitionerClass(topClass);
    //Set the key class for the job output data
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    //Set the value class for job outputs
    job.setOutputValueClass(KeyValue.class);
    //outputformatHfile
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(SingleColumnReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    //?regionstarkey
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");

    //?region?reduce?
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);

    URI cacheUri;
    try {
        // Below we make explicit reference to the bundled TOP.  Its cheating.
        // We are assume the define in the hbase bundled TOP is as it is in
        // hadoop (whether 0.20 or 0.22, etc.)
        /*
          cacheUri = new URI(partitionsPath.toString() + "#" +
            org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH);
            */
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    // Set compression algorithms based on column families
    configureCompression(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    LOG.info("Incremental table output configured.");
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }//from   w w  w . j a  v a  2s . c  o m

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

@SuppressWarnings("rawtypes")
private void configureShuffle(Job job, VariableTable variables) {
    Class<? extends Reducer> reducer = getReducerClassOrNull();
    if (reducer != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$
        }/*from   w  w w  . j  a  v  a 2  s. c  o  m*/
        job.setReducerClass(reducer);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Reducer: N/A"); //$NON-NLS-1$
        }
        job.setNumReduceTasks(0);
        return;
    }

    Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class);
    Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$
                outputKeyClass.getName(), outputValueClass.getName()));
    }
    job.setMapOutputKeyClass(outputKeyClass);
    job.setMapOutputValueClass(outputValueClass);

    Class<? extends Reducer> combiner = getCombinerClassOrNull();
    if (combiner != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$
        }
        job.setCombinerClass(combiner);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Combiner: N/A"); //$NON-NLS-1$
        }
    }

    Class<? extends Partitioner> partitioner = getPartitionerClassOrNull();
    if (partitioner != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$
        }
        job.setPartitionerClass(partitioner);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$
        }
    }

    Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull();
    if (groupingComparator != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$
        }
        job.setGroupingComparatorClass(groupingComparator);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$
        }
    }

    Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull();
    if (sortComparator != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$
        }
        job.setSortComparatorClass(sortComparator);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$
        }
    }
}

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void updateMerge() throws IOException, InterruptedException {
    Job job = newJob();

    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinBaseMapper.class));
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            MergeJoinPatchMapper.class));
    StageInputDriver.set(job, inputList);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(PatchApplyKey.class);
    job.setMapOutputValueClass(modelClass);

    // combiner may have no effect in normal cases
    job.setReducerClass(MergeJoinReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);
    job.setPartitionerClass(PatchApplyKey.Partitioner.class);
    job.setSortComparatorClass(PatchApplyKey.SortComparator.class);
    job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {//ww w  .  j av  a  2s.co  m
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilder.java

License:Apache License

/**
 * Creates a {@link Job} based on how {@code this} {@link KafkaJobBuilder} has been configured. There are no
 * side-effects on {@code this} instance when you call this method, so you can call it multiple times.
 * /*from w  ww  . ja v a  2s  . co m*/
 * @param conf
 *            the job conf.
 * @return a fully configured {@link Job}.
 * @throws Exception error
 * @throws IllegalArgumentException
 *             if any required parameters are not set.
 */
public Job configureJob(final Configuration conf) throws Exception {
    validateSettings();
    final Job job = Job.getInstance(conf, getDefaultedJobName());

    // set queue inputs
    if (getQueueMappers().size() == 1) {
        job.setInputFormatClass(KafkaInputFormat.class);
        final TopicConf topicConf = Iterables.getOnlyElement(getQueueMappers());
        KafkaInputFormat.setTopic(job, topicConf.getTopic());
        KafkaInputFormat.setConsumerGroup(job, topicConf.getConsumerGroup());
        job.setMapperClass(topicConf.getMapper());
    } else {
        job.setInputFormatClass(MultipleKafkaInputFormat.class);
        for (final TopicConf topicConf : getQueueMappers()) {
            MultipleKafkaInputFormat.addTopic(job, topicConf.getTopic(), topicConf.getConsumerGroup(),
                    topicConf.getMapper());
        }
    }

    if (getMapOutputKeyClass() != null) {
        job.setMapOutputKeyClass(getMapOutputKeyClass());
    }

    if (getMapOutputValueClass() != null) {
        job.setMapOutputValueClass(getMapOutputValueClass());
    }

    if (getReducerClass() == null) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(getReducerClass());
        job.setNumReduceTasks(getNumReduceTasks());
    }

    if (getPartitionerClass() != null) {
        job.setPartitionerClass(getPartitionerClass());
    }

    // set output
    job.setOutputFormatClass(getOutputFormatClass());
    job.setOutputKeyClass(getOutputKeyClass());
    job.setOutputValueClass(getOutputValueClass());
    if (getOutputFormat() == SupportedOutputFormat.TEXT_FILE) {
        TextOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    } else if (getOutputFormat() == SupportedOutputFormat.SEQUENCE_FILE) {
        SequenceFileOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    }

    if (usingS3()) {
        job.getConfiguration().set("fs.s3n.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3n.awsSecretAccessKey", getS3SecretyKey());
        job.getConfiguration().set("fs.s3.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3.awsSecretAccessKey", getS3SecretyKey());
    }

    if (isLazyOutputFormat()) {
        LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
    }

    // setup kafka input format specifics
    KafkaInputFormat.setZkConnect(job, getZkConnect());
    KafkaInputFormat.setKafkaFetchSizeBytes(job, getKafkaFetchSizeBytes());

    job.setSpeculativeExecution(false);
    job.setJarByClass(getClass());

    // memory settings for mappers
    if (!Strings.isNullOrEmpty(getTaskMemorySettings())) {
        job.getConfiguration().set("mapred.child.java.opts", getTaskMemorySettings());
    }

    return job;
}

From source file:com.benchmark.mapred.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//from  w w w  . ja v  a2 s.  com
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java

public void group() throws Exception {
    final Configuration configuration = new Configuration();

    final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration);
    if (fileSystem.exists(new Path(OUT_PATH))) {
        fileSystem.delete(new Path(OUT_PATH), true);
    }//from  ww  w  .  j a  v  a 2s .  c o  m

    final Job job = new Job(configuration, GroupApp.class.getSimpleName());

    //1.1 
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(NewK2.class);
    job.setMapOutputValueClass(LongWritable.class);

    //1.3 
    job.setPartitionerClass(HashPartitioner.class);
    job.setNumReduceTasks(1);

    //1.4 TODO ??
    job.setGroupingComparatorClass(MyGroupingComparator.class);
    //1.5  TODO ??

    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}