Example usage for org.apache.mahout.common IntPairWritable getFirst

List of usage examples for org.apache.mahout.common IntPairWritable getFirst

Introduction

In this page you can find the example usage for org.apache.mahout.common IntPairWritable getFirst.

Prototype

public int getFirst() 

Source Link

Usage

From source file:com.mozilla.grouperfish.mahout.clustering.display.lda.DisplayLDATopics.java

License:Apache License

public static Map<Integer, PriorityQueue<Pair<Double, String>>> getTopWordsByTopics(String stateDirPath,
        Map<Integer, String> featureIndex, int numWordsToPrint) {
    Map<Integer, Double> expSums = new HashMap<Integer, Double>();
    Map<Integer, PriorityQueue<Pair<Double, String>>> queues = new HashMap<Integer, PriorityQueue<Pair<Double, String>>>();
    SequenceFileDirectoryReader reader = null;
    try {//  w  ww.j  a  v a2 s.  c  o m
        IntPairWritable k = new IntPairWritable();
        DoubleWritable v = new DoubleWritable();
        reader = new SequenceFileDirectoryReader(new Path(stateDirPath));
        while (reader.next(k, v)) {
            int topic = k.getFirst();
            int featureId = k.getSecond();
            if (featureId >= 0 && topic >= 0) {
                double score = v.get();
                Double curSum = expSums.get(topic);
                if (curSum == null) {
                    curSum = 0.0;
                }
                expSums.put(topic, curSum + Math.exp(score));
                String feature = featureIndex.get(featureId);

                PriorityQueue<Pair<Double, String>> q = queues.get(topic);
                if (q == null) {
                    q = new PriorityQueue<Pair<Double, String>>(numWordsToPrint);
                }
                enqueue(q, feature, score, numWordsToPrint);
                queues.put(topic, q);
            }
        }
    } catch (IOException e) {
        LOG.error("Error reading LDA state dir", e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    for (Map.Entry<Integer, PriorityQueue<Pair<Double, String>>> entry : queues.entrySet()) {
        int topic = entry.getKey();
        for (Pair<Double, String> p : entry.getValue()) {
            double score = p.getFirst();
            p.setFirst(Math.exp(score) / expSums.get(topic));
        }
    }

    return queues;
}