List of usage examples for org.apache.mahout.common IntPairWritable getFirst
public int getFirst()
From source file:com.mozilla.grouperfish.mahout.clustering.display.lda.DisplayLDATopics.java
License:Apache License
public static Map<Integer, PriorityQueue<Pair<Double, String>>> getTopWordsByTopics(String stateDirPath, Map<Integer, String> featureIndex, int numWordsToPrint) { Map<Integer, Double> expSums = new HashMap<Integer, Double>(); Map<Integer, PriorityQueue<Pair<Double, String>>> queues = new HashMap<Integer, PriorityQueue<Pair<Double, String>>>(); SequenceFileDirectoryReader reader = null; try {// w ww.j a v a2 s. c o m IntPairWritable k = new IntPairWritable(); DoubleWritable v = new DoubleWritable(); reader = new SequenceFileDirectoryReader(new Path(stateDirPath)); while (reader.next(k, v)) { int topic = k.getFirst(); int featureId = k.getSecond(); if (featureId >= 0 && topic >= 0) { double score = v.get(); Double curSum = expSums.get(topic); if (curSum == null) { curSum = 0.0; } expSums.put(topic, curSum + Math.exp(score)); String feature = featureIndex.get(featureId); PriorityQueue<Pair<Double, String>> q = queues.get(topic); if (q == null) { q = new PriorityQueue<Pair<Double, String>>(numWordsToPrint); } enqueue(q, feature, score, numWordsToPrint); queues.put(topic, q); } } } catch (IOException e) { LOG.error("Error reading LDA state dir", e); } finally { if (reader != null) { reader.close(); } } for (Map.Entry<Integer, PriorityQueue<Pair<Double, String>>> entry : queues.entrySet()) { int topic = entry.getKey(); for (Pair<Double, String> p : entry.getValue()) { double score = p.getFirst(); p.setFirst(Math.exp(score) / expSums.get(topic)); } } return queues; }