Example usage for org.apache.hadoop.mapred Partitioner getPartition

List of usage examples for org.apache.hadoop.mapred Partitioner getPartition

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Partitioner getPartition.

Prototype

int getPartition(K2 key, V2 value, int numPartitions);

Source Link

Document

Get the paritition number for a given key (hence record) given the total number of partitions i.e.

Usage

From source file:edu.umd.cloud9.pagerank.RunPageRankSchimmy.java

License:Apache License

private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner,
        boolean useRange) throws IOException {
    JobConf conf = new JobConf(RunPageRankBasic.class);

    String in = path + "/iter" + sFormat.format(i);
    String out = path + "/iter" + sFormat.format(j) + "t";
    String outm = out + "-mass";

    FileSystem fs = FileSystem.get(conf);

    // we need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log)
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;/*www .  j  av a  2 s .  c o m*/
    }

    conf.setInt("NodeCount", n);

    Partitioner p = null;

    if (useRange) {
        p = new RangePartitioner<IntWritable, Writable>();
        p.configure(conf);
    } else {
        p = new HashPartitioner<WritableComparable, Writable>();
    }

    // this is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    PageRankNode value = new PageRankNode();
    FileStatus[] status = fs.listStatus(new Path(in));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
        if (f.getPath().getName().contains("_logs"))
            continue;

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), conf);

        reader.next(key, value);
        int np = p.getPartition(key, value, numPartitions);
        reader.close();

        sLogger.info(f.getPath() + "\t" + np);
        sb.append(np + "=" + f.getPath() + "\t");
    }

    sLogger.info(sb.toString().trim());

    sLogger.info("PageRankSchimmy: iteration " + j + ": Phase1");
    sLogger.info(" - input: " + in);
    sLogger.info(" - output: " + out);
    sLogger.info(" - nodeCnt: " + n);
    sLogger.info(" - useCombiner: " + useCombiner);
    sLogger.info(" - useInmapCombiner: " + useInmapCombiner);
    sLogger.info(" - numPartitions: " + numPartitions);
    sLogger.info(" - useRange: " + useRange);
    sLogger.info("computed number of partitions: " + numPartitions);

    int numMapTasks = numPartitions;
    int numReduceTasks = numPartitions;

    conf.setJobName("PageRankSchimmy:iteration" + j + ":Phase1");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.set("PageRankMassPath", outm);
    conf.set("BasePath", in);
    conf.set("PartitionMapping", sb.toString().trim());

    FileInputFormat.setInputPaths(conf, new Path(in));
    FileOutputFormat.setOutputPath(conf, new Path(out));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(FloatWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    if (useInmapCombiner) {
        conf.setMapperClass(MapWithInMapperCombiningClass.class);
    } else {
        conf.setMapperClass(MapClass.class);
    }

    if (useCombiner) {
        conf.setCombinerClass(CombineClass.class);
    }

    if (useRange) {
        conf.setPartitionerClass(RangePartitioner.class);
    }

    conf.setReducerClass(ReduceClass.class);

    conf.setSpeculativeExecution(false);

    FileSystem.get(conf).delete(new Path(out), true);
    FileSystem.get(conf).delete(new Path(outm), true);

    JobClient.runJob(conf);

    float mass = Float.NEGATIVE_INFINITY;
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:org.apache.hama.computemodel.mapreduce.ShuffleAndDistribute.java

License:Apache License

@Override
protected void compute(
        BSPPeer<NullWritable, NullWritable, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> peer)
        throws IOException {
    int peerId = peer.getPeerId();
    Configuration conf = peer.getConfiguration();

    this.memoryQueue = (PriorityQueue<WritableKeyValues<K2, V2>>) peer.getSavedObject(Mapper.MESSAGE_QUEUE);

    this.globalKeyDistribution = (long[][]) peer.getSavedObject(Mapper.KEY_DIST);

    WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable> message;
    while ((message = (WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable>) peer
            .getCurrentMessage()) != null) {
        int peerNo = message.getKey().getKey().get();
        int partition = message.getKey().getValue().get();
        globalKeyDistribution[peerNo][partition] += message.getValue().get();
    }/*from   w  w  w  .  j  a va  2s  . com*/

    int[] keyDistribution = new int[globalKeyDistribution[0].length];

    designateKeysToReducers(keyDistribution, globalKeyDistribution, conf);

    int myKeyCount = 0;
    for (int i = 0; i < globalKeyDistribution[0].length; ++i) {
        myKeyCount += globalKeyDistribution[peerId][i];
    }

    PriorityQueue<WritableKeyValues<K2, V2>> mergeQueue = new PriorityQueue<WritableKeyValues<K2, V2>>(
            myKeyCount);
    Partitioner<K2, V2> partitioner = (Partitioner<K2, V2>) ReflectionUtils
            .newInstance(conf.getClass(Mapper.PARTITIONER_CLASS, HashPartitioner.class), conf);

    Iterator<WritableKeyValues<K2, V2>> keyValIter = this.memoryQueue.iterator();
    String[] peerNames = peer.getAllPeerNames();
    while (keyValIter.hasNext()) {
        WritableKeyValues<K2, V2> record = keyValIter.next();
        int partition = partitioner.getPartition(record.getKey(), record.getValue(), peer.getNumPeers()); // should be num reducers
                                                                                                          // eventually
        int destPeerId = keyDistribution[partition];
        if (peerId != destPeerId) {
            peer.send(peerNames[destPeerId], record);
            keyValIter.remove();
        }
    }

}