List of usage examples for org.apache.mahout.math MatrixSlice index
int index
To view the source code for org.apache.mahout.math MatrixSlice index.
Click Source Link
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
private static Vector viewRowSums(Matrix m) { Vector v = new DenseVector(m.numRows()); for (MatrixSlice slice : m) { v.set(slice.index(), slice.vector().norm(1)); }/*from ww w. j a v a 2s . co m*/ return v; }
From source file:com.elex.dmp.lda.CachingCVB0Mapper.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { log.info("Stopping model trainer"); modelTrainer.stop();/*from w w w . j a va 2s . c om*/ log.info("Writing model"); TopicModel model = modelTrainer.getReadModel(); for (MatrixSlice topic : model) { context.write(new Text(Integer.toString(topic.index())), new VectorWritable(topic.vector())); } }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public double calculatePerplexity(VectorIterable matrix, VectorIterable docTopicCounts, double testFraction) { Iterator<MatrixSlice> docIterator = matrix.iterator(); Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator(); double perplexity = 0; double matrixNorm = 0; while (docIterator.hasNext() && docTopicIterator.hasNext()) { MatrixSlice docSlice = docIterator.next(); MatrixSlice topicSlice = docTopicIterator.next(); int docId = docSlice.index(); Vector document = docSlice.vector(); Vector topicDist = topicSlice.vector(); if (testFraction == 0 || docId % (1 / testFraction) == 0) { trainSync(document, topicDist, false, 10); perplexity += readModel.perplexity(document, topicDist); matrixNorm += document.norm(1); }//from w ww . jav a 2s . com } return perplexity / matrixNorm; }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
/** * small integration test that runs the full job * * <pre>//from ww w .ja va 2 s . c om * * user-item-matrix * * burger hotdog berries icecream * dog 5 5 2 - * rabbit 2 - 3 5 * cow - 5 - 3 * donkey 3 - - 5 * * </pre> */ @Test public void completeJobToyExample() throws Exception { Double na = Double.NaN; Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }), new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }), new DenseVector(new double[] { na, 5.0, na, 3.0 }), new DenseVector(new double[] { 3.0, na, na, 5.0 }) }); writeLines(inputFile, preferencesAsText(preferences)); indexSizeFile.deleteOnExit(); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); int numFeatures = 3; int numIterations = 5; double lambda = 0.065; alsFactorization .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), preferences.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), preferences.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero(); while (elementIterator.hasNext()) { Vector.Element e = elementIterator.next(); if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double err = pref - estimate; avg.addDatum(err * err); log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]", new Object[] { slice.index(), e.index(), pref, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.2); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
@Test public void completeJobImplicitToyExample() throws Exception { Matrix observations = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }), new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }), new DenseVector(new double[] { 0, 5.0, 0, 3.0 }), new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) }); Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }), new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }), new DenseVector(new double[] { 0, 1.0, 0, 1.0 }), new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) }); writeLines(inputFile, preferencesAsText(observations)); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf);//from w w w.j a v a2s.c o m int numFeatures = 3; int numIterations = 5; double lambda = 0.065; double alpha = 20; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), observations.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), observations.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nObservations - users x items\n"); info.append(MathHelper.nice(observations)); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); for (Vector.Element e : slice.vector()) { if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index()); double err = confidence * (pref - estimate) * (pref - estimate); avg.addDatum(err); log.info( "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] " + "estimate is [{}]", new Object[] { slice.index(), e.index(), pref, confidence, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.4); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
protected static String preferencesAsText(Matrix preferences) { StringBuilder prefsAsText = new StringBuilder(); String separator = ""; Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero(); while (elementIterator.hasNext()) { Vector.Element e = elementIterator.next(); if (!Double.isNaN(e.get())) { prefsAsText.append(separator).append(slice.index()).append(',').append(e.index()).append(',') .append(e.get()); separator = "\n"; }/* w w w . jav a2 s.com*/ } } return prefsAsText.toString(); }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/*** * If the MapDir matrix is small, we can convert it to an in memory representation * and then run efficient centralized operations * /* ww w . j av a 2s . c o m*/ * @param origMtx in MapDir format (generated by MatrixOutputFormat) * @return a dense matrix including the data * @throws IOException */ public static DenseMatrix toDenseMatrix(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); DenseMatrix mtx = new DenseMatrix(origMtx.numRows(), origMtx.numCols()); Iterator<MatrixSlice> sliceIterator; try { sliceIterator = mapDir.iterateAll(); } catch (Exception e) { log.info(e.toString()); log.info("Input is not in matrix format, trying SequenceFileFormat instead ..."); sliceIterator = origMtx.iterateAll(); } while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); // int r = slice.index(); // for (int c = 0; c < mtx.numCols(); c++) { // mtx.set(r, c, slice.get(c)); // } mtx.viewRow(slice.index()).assign(slice.vector()); } mapDir.close(); return mtx; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/*** * If the MapDir matrix is small, we can convert it to an in memory representation * and then run efficient centralized operations * //from w w w . j a va 2 s. c o m * @param origMtx in MapDir format (generated by MatrixOutputFormat) * @return a dense matrix including the data * @throws IOException */ static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols()); Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); mtx.viewRow(slice.index()).assign(slice.vector()); } mapDir.close(); return mtx; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Trace of a matrix obtained in a centralized way. For some reason, which I did not have time to debug, raises memory exception for big matrices. * /*from w w w . ja va 2s.c om*/ * TODO: MapReduce job for traces of big matrices. * @param origMtx * @return trace of the input matrix * @throws IOException */ static double trace(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll(); double trace = 0; while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); int index = slice.index(); if (index >= slice.vector().size()) break; double value = slice.vector().get(index); trace += Double.isNaN(value) ? 0 : value; } mapDir.close(); return trace; }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
public static void testIterator(DistributedRowMatrix origMtx, Path inPath) throws IOException { Configuration conf = new Configuration(); MapDir mapDir = new MapDir(conf, inPath); Iterator<MatrixSlice> sliceIterator = origMtx.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); int index = slice.index(); System.out.println("A[" + index + "] = " + slice.vector()); IntWritable key = new IntWritable(index); VectorWritable vw = new VectorWritable(); vw = mapDir.get(key, vw);// w w w .j av a 2 s. com System.out.println("B[" + index + "] = " + vw); } mapDir.close(); }