List of usage examples for org.apache.mahout.common Pair of
public static <A, B> Pair<A, B> of(A a, B b)
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
private static Pair<Matrix, Vector> randomMatrix(int numTopics, int numTerms, Random random) { Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); if (random != null) { for (int x = 0; x < numTopics; x++) { for (int term = 0; term < numTerms; term++) { topicTermCounts.viewRow(x).set(term, random.nextDouble()); }/*w w w . ja v a2 s. com*/ } } for (int x = 0; x < numTopics; x++) { topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1)); } return Pair.of(topicTermCounts, topicSums); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException { int numTopics = -1; int numTerms = -1; List<Pair<Integer, Vector>> rows = Lists.newArrayList(); for (Path modelPath : modelPaths) { for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true, conf)) {/*from www .j a v a 2 s . co m*/ rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext if (numTerms < 0) { numTerms = row.getSecond().get().size(); } } } if (rows.isEmpty()) { throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it"); } numTopics++; Matrix model = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); for (Pair<Integer, Vector> pair : rows) { model.viewRow(pair.getFirst()).assign(pair.getSecond()); topicSums.set(pair.getFirst(), pair.getSecond().norm(1)); } return Pair.of(model, topicSums); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public static String vectorToSortedString(Vector vector, String[] dictionary) { List<Pair<String, Double>> vectorValues = new ArrayList<Pair<String, Double>>( vector.getNumNondefaultElements()); Iterator<Vector.Element> it = vector.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); vectorValues/*from w w w . j ava 2s. c o m*/ .add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()), e.get())); } Collections.sort(vectorValues, new Comparator<Pair<String, Double>>() { @Override public int compare(Pair<String, Double> x, Pair<String, Double> y) { return y.getSecond().compareTo(x.getSecond()); } }); Iterator<Pair<String, Double>> listIt = vectorValues.iterator(); StringBuilder bldr = new StringBuilder(2048); bldr.append('{'); int i = 0; while (listIt.hasNext() && i < 25) { i++; Pair<String, Double> p = listIt.next(); bldr.append(p.getFirst()); bldr.append(':'); bldr.append(p.getSecond()); bldr.append(','); } if (bldr.length() > 1) { bldr.setCharAt(bldr.length() - 1, '}'); } return bldr.toString(); }
From source file:com.elex.dmp.lda.TopicModel.java
License:Apache License
public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException { int numTopics = -1; int numTerms = -1; List<Pair<Integer, Vector>> rows = Lists.newArrayList(); for (Path modelPath : modelPaths) { for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true, conf)) {//from w ww . j a v a 2s . c om rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext if (numTerms < 0) { numTerms = row.getSecond().get().size(); } } } if (rows.isEmpty()) { throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it"); } numTopics++; Matrix model = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); for (Pair<Integer, Vector> pair : rows) { model.viewRow(pair.getFirst()).assign(pair.getSecond()); topicSums.set(pair.getFirst(), pair.getSecond().norm(1)); } return Pair.of(model, topicSums); }