List of usage examples for org.apache.mahout.math SparseRowMatrix SparseRowMatrix
public SparseRowMatrix(int rows, int columns, boolean randomAccess)
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public void batchTrain(Map<Vector, Vector> batch, boolean update, int numDocTopicsIters) { while (true) { try {// w ww . jav a 2 s . c o m List<TrainerRunnable> runnables = Lists.newArrayList(); for (Map.Entry<Vector, Vector> entry : batch.entrySet()) { runnables.add(new TrainerRunnable(readModel, null, entry.getKey(), entry.getValue(), new SparseRowMatrix(numTopics, numTerms, true), numDocTopicsIters)); } threadPool.invokeAll(runnables); if (update) { for (TrainerRunnable runnable : runnables) { writeModel.update(runnable.docTopicModel); } } break; } catch (InterruptedException e) { log.warn("Interrupted during batch training, retrying!", e); } } }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public void train(Vector document, Vector docTopicCounts, boolean update, int numDocTopicIters) { while (true) { try {/*from w w w.ja va 2s . c om*/ workQueue.put(new TrainerRunnable(readModel, update ? writeModel : null, document, docTopicCounts, new SparseRowMatrix(numTopics, numTerms, true), numDocTopicIters)); return; } catch (InterruptedException e) { log.warn("Interrupted waiting to submit document to work queue: " + document, e); } } }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public void trainSync(Vector document, Vector docTopicCounts, boolean update, int numDocTopicIters) { new TrainerRunnable(readModel, update ? writeModel : null, document, docTopicCounts, new SparseRowMatrix(numTopics, numTerms, true), numDocTopicIters).run(); }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public double calculatePerplexity(Vector document, Vector docTopicCounts, int numDocTopicIters) { TrainerRunnable runner = new TrainerRunnable(readModel, null, document, docTopicCounts, new SparseRowMatrix(numTopics, numTerms, true), numDocTopicIters); return runner.call(); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
/** * small integration test that runs the full job * * <pre>// w w w . j av a 2 s.com * * user-item-matrix * * burger hotdog berries icecream * dog 5 5 2 - * rabbit 2 - 3 5 * cow - 5 - 3 * donkey 3 - - 5 * * </pre> */ @Test public void completeJobToyExample() throws Exception { Double na = Double.NaN; Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }), new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }), new DenseVector(new double[] { na, 5.0, na, 3.0 }), new DenseVector(new double[] { 3.0, na, na, 5.0 }) }); writeLines(inputFile, preferencesAsText(preferences)); indexSizeFile.deleteOnExit(); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); int numFeatures = 3; int numIterations = 5; double lambda = 0.065; alsFactorization .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), preferences.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), preferences.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero(); while (elementIterator.hasNext()) { Vector.Element e = elementIterator.next(); if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double err = pref - estimate; avg.addDatum(err * err); log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]", new Object[] { slice.index(), e.index(), pref, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.2); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
@Test public void completeJobImplicitToyExample() throws Exception { Matrix observations = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }), new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }), new DenseVector(new double[] { 0, 5.0, 0, 3.0 }), new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) }); Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }), new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }), new DenseVector(new double[] { 0, 1.0, 0, 1.0 }), new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) }); writeLines(inputFile, preferencesAsText(observations)); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf);/*from w ww.ja va 2 s . co m*/ int numFeatures = 3; int numIterations = 5; double lambda = 0.065; double alpha = 20; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), observations.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), observations.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nObservations - users x items\n"); info.append(MathHelper.nice(observations)); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); for (Vector.Element e : slice.vector()) { if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index()); double err = confidence * (pref - estimate) * (pref - estimate); avg.addDatum(err); log.info( "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] " + "estimate is [{}]", new Object[] { slice.index(), e.index(), pref, confidence, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.4); }