Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java

public void kpi() throws Exception {
    final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName());

    job.setJarByClass(KpiApp.class);

    //1.1 /*from  w w w .  j a  v a2  s .com*/
    FileInputFormat.setInputPaths(job, INPUT_PATH);
    //??
    job.setInputFormatClass(TextInputFormat.class);

    //1.2Mapper
    job.setMapperClass(MyMapper.class);
    //<k2,v2>
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(KpiWritable.class);

    //1.3 
    job.setPartitionerClass(KpiPartitioner.class);
    job.setNumReduceTasks(2);

    //1.4 TODO ??
    //1.5  TODO ??
    //2.2 reduce
    job.setReducerClass(MyReducer.class);
    //<k3,v3>
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(KpiWritable.class);

    //2.3 
    FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    //?
    job.setOutputFormatClass(TextOutputFormat.class);

    //???JobTracker
    job.waitForCompletion(true);
}

From source file:com.binbo.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//w  ww. j  a va2s  . co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class); // Set the combiner
    job.setPartitionerClass(WordPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*from   w  ww.  ja  v  a2  s .  c o  m*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     ZPutSortReducer)</li>
 * </ul> 
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(ZHFileOutputFormat.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(ZPutSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);
    URI cacheUri;
    try {
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    LOG.info("Incremental table output configured.");
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning
 * against <code>splitPoints</code>. Cleans up the partitions file after job
 * exists.// w w w  . ja va  2s  .c  o m
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {

    // create the partitions file
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID());
    fs.makeQualified(partitionsPath);
    fs.deleteOnExit(partitionsPath);
    writePartitions(job.getConfiguration(), partitionsPath, splitPoints);

    // configure job to use it
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);
}

From source file:com.cloudera.crunch.GroupingOptions.java

License:Open Source License

public void configure(Job job) {
    if (partitionerClass != null) {
        job.setPartitionerClass(partitionerClass);
    }/*from   w  ww  . j a v a 2s  .c om*/
    if (groupingComparatorClass != null) {
        job.setGroupingComparatorClass(groupingComparatorClass);
    }
    if (sortComparatorClass != null) {
        job.setSortComparatorClass(sortComparatorClass);
    }
    if (numReducers > 0) {
        job.setNumReduceTasks(numReducers);
        LOG.info(String.format("Using %d reduce tasks", numReducers));
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();//from w  ww .  j  av a2s .c o m
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    boolean useSimplePartitioner = getUseSimplePartitioner(job);

    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
        useSimplePartitioner = true;
    } else {
        Path inputDir = new Path(args[0]);
        TeraInputFormat.setInputPaths(job, inputDir);
        job.setInputFormatClass(TeraInputFormat.class);
    }

    Path outputDir = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java

License:Apache License

/**
 * Convenience method to configure the job for using the composite key.
 * @param job            the job using this serializer
 * @param groupKeyClass      the key type used for grouping
 * @param sortKeyClass      the key type used for sorting
 *///from w w  w. j a v  a2 s .  c  om
@SuppressWarnings("rawtypes")
public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass,
        Class<? extends WritableComparable> sortKeyClass) {

    // First, setup our classes...
    job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName());
    job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName());

    // Set this class as our map output key
    job.setMapOutputKeyClass(CompositeSortKey.class);

    // Setup the partitioner and comparators.
    job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class);
    job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class);
    job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class);

    // Now setup the serialization by registering with the framework.
    Collection<String> serializations = new ArrayList<>();
    serializations.add(CompositeSortKeySerialization.class.getName());
    serializations.addAll(job.getConfiguration().getStringCollection("io.serializations"));
    job.getConfiguration().setStrings("io.serializations",
            serializations.toArray(new String[serializations.size()]));

}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.PartitionerAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) {
    Partitioner partitioner = (Partitioner) annotation;
    if (partitioner != null && partitioner.value() != null && partitioner.value() != NULLPARTITIONER.class) {
        job.setPartitionerClass(partitioner.value());
    }/*www .j  av  a  2  s.co m*/
}

From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairs.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(ConditionalProbabilityPairs.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setPartitionerClass(ProbDistPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(5);/*w  w  w  . jav a 2 s.c  o  m*/
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;

}

From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairsSecondarySort.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(ConditionalProbabilityPairsSecondarySort.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setSortComparatorClass(KeyComparator.class);
    job.setGroupingComparatorClass(GroupComparator.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setPartitionerClass(ProbDistPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    //Is the output value class for Map or Reduce ?
    job.setOutputValueClass(Text.class);
    //job.setNumReduceTasks(5);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;/*from ww w  .java2  s .c  o m*/
}