Example usage for org.apache.hadoop.mapred Partitioner getPartition

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Partitioner getPartition.

Prototype

int getPartition(K2 key, V2 value, int numPartitions);

Source Link

Document

Get the paritition number for a given key (hence record) given the total number of partitions i.e.

Usage

From source file:edu.umd.cloud9.pagerank.RunPageRankSchimmy.java

License:Apache License

private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner,
        boolean useRange) throws IOException {
    JobConf conf = new JobConf(RunPageRankBasic.class);

    String in = path + "/iter" + sFormat.format(i);
    String out = path + "/iter" + sFormat.format(j) + "t";
    String outm = out + "-mass";

    FileSystem fs = FileSystem.get(conf);

    // we need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log)
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;/*www .  j  av a  2 s .  c o m*/
    }

    conf.setInt("NodeCount", n);

    Partitioner p = null;

    if (useRange) {
        p = new RangePartitioner<IntWritable, Writable>();
        p.configure(conf);
    } else {
        p = new HashPartitioner<WritableComparable, Writable>();
    }

    // this is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    PageRankNode value = new PageRankNode();
    FileStatus[] status = fs.listStatus(new Path(in));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
        if (f.getPath().getName().contains("_logs"))
            continue;

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), conf);

        reader.next(key, value);
        int np = p.getPartition(key, value, numPartitions);
        reader.close();

        sLogger.info(f.getPath() + "\t" + np);
        sb.append(np + "=" + f.getPath() + "\t");
    }

    sLogger.info(sb.toString().trim());

    sLogger.info("PageRankSchimmy: iteration " + j + ": Phase1");
    sLogger.info(" - input: " + in);
    sLogger.info(" - output: " + out);
    sLogger.info(" - nodeCnt: " + n);
    sLogger.info(" - useCombiner: " + useCombiner);
    sLogger.info(" - useInmapCombiner: " + useInmapCombiner);
    sLogger.info(" - numPartitions: " + numPartitions);
    sLogger.info(" - useRange: " + useRange);
    sLogger.info("computed number of partitions: " + numPartitions);

    int numMapTasks = numPartitions;
    int numReduceTasks = numPartitions;

    conf.setJobName("PageRankSchimmy:iteration" + j + ":Phase1");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.set("PageRankMassPath", outm);
    conf.set("BasePath", in);
    conf.set("PartitionMapping", sb.toString().trim());

    FileInputFormat.setInputPaths(conf, new Path(in));
    FileOutputFormat.setOutputPath(conf, new Path(out));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(FloatWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    if (useInmapCombiner) {
        conf.setMapperClass(MapWithInMapperCombiningClass.class);
    } else {
        conf.setMapperClass(MapClass.class);
    }

    if (useCombiner) {
        conf.setCombinerClass(CombineClass.class);
    }

    if (useRange) {
        conf.setPartitionerClass(RangePartitioner.class);
    }

    conf.setReducerClass(ReduceClass.class);

    conf.setSpeculativeExecution(false);

    FileSystem.get(conf).delete(new Path(out), true);
    FileSystem.get(conf).delete(new Path(outm), true);

    JobClient.runJob(conf);

    float mass = Float.NEGATIVE_INFINITY;
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:org.apache.hama.computemodel.mapreduce.ShuffleAndDistribute.java

License:Apache License

@Override
protected void compute(
        BSPPeer<NullWritable, NullWritable, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> peer)
        throws IOException {
    int peerId = peer.getPeerId();
    Configuration conf = peer.getConfiguration();

    this.memoryQueue = (PriorityQueue<WritableKeyValues<K2, V2>>) peer.getSavedObject(Mapper.MESSAGE_QUEUE);

    this.globalKeyDistribution = (long[][]) peer.getSavedObject(Mapper.KEY_DIST);

    WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable> message;
    while ((message = (WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable>) peer
            .getCurrentMessage()) != null) {
        int peerNo = message.getKey().getKey().get();
        int partition = message.getKey().getValue().get();
        globalKeyDistribution[peerNo][partition] += message.getValue().get();
    }/*from   w  w  w  .  j  a va  2s  . com*/

    int[] keyDistribution = new int[globalKeyDistribution[0].length];

    designateKeysToReducers(keyDistribution, globalKeyDistribution, conf);

    int myKeyCount = 0;
    for (int i = 0; i < globalKeyDistribution[0].length; ++i) {
        myKeyCount += globalKeyDistribution[peerId][i];
    }

    PriorityQueue<WritableKeyValues<K2, V2>> mergeQueue = new PriorityQueue<WritableKeyValues<K2, V2>>(
            myKeyCount);
    Partitioner<K2, V2> partitioner = (Partitioner<K2, V2>) ReflectionUtils
            .newInstance(conf.getClass(Mapper.PARTITIONER_CLASS, HashPartitioner.class), conf);

    Iterator<WritableKeyValues<K2, V2>> keyValIter = this.memoryQueue.iterator();
    String[] peerNames = peer.getAllPeerNames();
    while (keyValIter.hasNext()) {
        WritableKeyValues<K2, V2> record = keyValIter.next();
        int partition = partitioner.getPartition(record.getKey(), record.getValue(), peer.getNumPeers()); // should be num reducers
                                                                                                          // eventually
        int destPeerId = keyDistribution[partition];
        if (peerId != destPeerId) {
            peer.send(peerNames[destPeerId], record);
            keyValIter.remove();
        }
    }

}