List of usage examples for org.apache.mahout.cf.taste.impl.common FullRunningAverage FullRunningAverage
public FullRunningAverage()
From source file:GavaFactorizer.java
License:Apache License
double getAveragePreference() throws TasteException { RunningAverage average = new FullRunningAverage(); LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { for (Preference pref : dataModel.getPreferencesFromUser(it.nextLong())) { average.addDatum(pref.getValue()); }// w ww . jav a 2 s . c o m } return average.getAverage(); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
/** * small integration test that runs the full job * * <pre>/* w w w .jav a2s . c o m*/ * * user-item-matrix * * burger hotdog berries icecream * dog 5 5 2 - * rabbit 2 - 3 5 * cow - 5 - 3 * donkey 3 - - 5 * * </pre> */ @Test public void completeJobToyExample() throws Exception { Double na = Double.NaN; Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }), new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }), new DenseVector(new double[] { na, 5.0, na, 3.0 }), new DenseVector(new double[] { 3.0, na, na, 5.0 }) }); writeLines(inputFile, preferencesAsText(preferences)); indexSizeFile.deleteOnExit(); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); int numFeatures = 3; int numIterations = 5; double lambda = 0.065; alsFactorization .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), preferences.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), preferences.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero(); while (elementIterator.hasNext()) { Vector.Element e = elementIterator.next(); if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double err = pref - estimate; avg.addDatum(err * err); log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]", new Object[] { slice.index(), e.index(), pref, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.2); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
@Test public void completeJobImplicitToyExample() throws Exception { Matrix observations = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }), new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }), new DenseVector(new double[] { 0, 5.0, 0, 3.0 }), new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) }); Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }), new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }), new DenseVector(new double[] { 0, 1.0, 0, 1.0 }), new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) }); writeLines(inputFile, preferencesAsText(observations)); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf);//from ww w. j a v a 2s. c o m int numFeatures = 3; int numIterations = 5; double lambda = 0.065; double alpha = 20; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), observations.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), observations.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nObservations - users x items\n"); info.append(MathHelper.nice(observations)); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); for (Vector.Element e : slice.vector()) { if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index()); double err = confidence * (pref - estimate) * (pref - estimate); avg.addDatum(err); log.info( "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] " + "estimate is [{}]", new Object[] { slice.index(), e.index(), pref, confidence, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.4); }
From source file:de.tuberlin.dima.recsys.ssnmm.interactioncut.BiasedItemBasedRecommender.java
License:Apache License
public BiasedItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity, int k, double lambda2, double lambda3) throws TasteException { super(dataModel, similarity); this.k = k;//from www. j av a2 s. c om this.similarity = similarity; RunningAverage averageRating = new FullRunningAverage(); LongPrimitiveIterator itemIDs = getDataModel().getItemIDs(); while (itemIDs.hasNext()) { for (Preference pref : getDataModel().getPreferencesForItem(itemIDs.next())) { averageRating.addDatum(pref.getValue()); } } mu = averageRating.getAverage(); itemBiases = new OpenLongDoubleHashMap(getDataModel().getNumItems()); userBiases = new OpenLongDoubleHashMap(getDataModel().getNumUsers()); itemIDs = getDataModel().getItemIDs(); while (itemIDs.hasNext()) { long itemID = itemIDs.nextLong(); PreferenceArray preferences = getDataModel().getPreferencesForItem(itemID); double sum = 0; for (Preference pref : preferences) { sum += pref.getValue() - mu; } double bi = sum / (lambda2 + preferences.length()); itemBiases.put(itemID, bi); } LongPrimitiveIterator userIDs = getDataModel().getUserIDs(); while (userIDs.hasNext()) { long userID = userIDs.nextLong(); PreferenceArray preferences = getDataModel().getPreferencesFromUser(userID); double sum = 0; for (Preference pref : preferences) { sum += pref.getValue() - mu - itemBiases.get(pref.getItemID()); } double bu = sum / (lambda3 + preferences.length()); userBiases.put(userID, bu); } }
From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.AverageRating.java
License:Apache License
public static void main(String[] args) { File dir = new File("/home/ssc/Entwicklung/datasets/yahoo-songs/"); File[] trainingFiles = dir.listFiles(new FilenameFilter() { @Override//from w w w . j a v a 2s. com public boolean accept(File dir, String name) { return name.startsWith("train_"); } }); Pattern sep = Pattern.compile("\t"); RunningAverage avg = new FullRunningAverage(); int ratingsProcessed = 0; for (File trainingFile : trainingFiles) { for (String line : Utils.readLines(trainingFile)) { int rating = Integer.parseInt(sep.split(line)[2]); avg.addDatum(rating); if (++ratingsProcessed % 10000000 == 0) { System.out.println(ratingsProcessed + " ratings processed"); } } } System.out.println("average rating " + avg.getAverage()); }
From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java
License:Apache License
public static void main(String[] args) throws IOException { int numUsers = 1823179; int numItems = 136736; double mu = 3.157255412010664; String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/"; String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv"; String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv"; String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv"; String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv"; Matrix similarities = new SparseRowMatrix(numItems, numItems); System.out.println("Reading similarities..."); int similaritiesRead = 0; Configuration conf = new Configuration(); for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) { int item = pair.getFirst().get(); Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero(); while (elements.hasNext()) { Vector.Element elem = elements.next(); similarities.setQuick(item, elem.index(), elem.get()); similaritiesRead++;//from w ww. ja v a 2 s . c o m } } System.out.println("Found " + similaritiesRead + " similarities"); Pattern sep = Pattern.compile("\t"); double[] itemBiases = new double[numItems]; double[] userBiases = new double[numUsers]; System.out.println("Reading item biases"); for (String line : new FileLineIterable(new File(itemBiasesFilePath))) { String[] parts = sep.split(line); itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } System.out.println("Reading user biases"); for (String line : new FileLineIterable(new File(userBiasesFilePath))) { String[] parts = sep.split(line); userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator(); Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator(); int currentUser = 0; OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap(); int usersProcessed = 0; RunningAverage rmse = new FullRunningAverage(); RunningAverage mae = new FullRunningAverage(); RunningAverage rmseBase = new FullRunningAverage(); RunningAverage maeBase = new FullRunningAverage(); while (trainRatings.hasNext()) { Rating rating = trainRatings.next(); if (rating.user() != currentUser) { for (int n = 0; n < 10; n++) { Rating heldOutRating = heldOutRatings.next(); Preconditions.checkState(heldOutRating.user() == currentUser); double preference = 0.0; double totalSimilarity = 0.0; int count = 0; Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item()) .iterateNonZero(); while (similarItems.hasNext()) { Vector.Element similarity = similarItems.next(); int similarItem = similarity.index(); if (prefs.containsKey(similarItem)) { preference += similarity.get() * (prefs.get(similarItem) - (mu + userBiases[currentUser] + itemBiases[similarItem])); totalSimilarity += Math.abs(similarity.get()); count++; } } double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()]; double estimate = baselineEstimate; if (count > 1) { estimate += preference / totalSimilarity; } double baseError = Math.abs(heldOutRating.rating() - baselineEstimate); maeBase.addDatum(baseError); rmseBase.addDatum(baseError * baseError); double error = Math.abs(heldOutRating.rating() - estimate); mae.addDatum(error); rmse.addDatum(error * error); } if (++usersProcessed % 10000 == 0) { System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); } currentUser = rating.user(); prefs.clear(); } prefs.put(rating.item(), rating.rating()); } System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); }
From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.UserItemBaseline.java
License:Apache License
void test() throws IOException { RunningAverage rmse = new FullRunningAverage(); RunningAverage mae = new FullRunningAverage(); System.out.println("Calculating predictions"); for (Rating rating : new RatingsIterable(tests)) { double error = Math.abs(rating.rating() - baselineEstimate(rating.user(), rating.item())); mae.addDatum(error);//from w ww . j ava 2 s . c o m rmse.addDatum(error * error); } System.out.println("MAE " + mae.getAverage() + ", RMSE: " + Math.sqrt(rmse.getAverage())); }
From source file:lib.eval.MAERecommenderEvaluator.java
License:Apache License
@Override protected void reset() { average = new FullRunningAverage(); }
From source file:net.myrrix.online.candidate.LocationSensitiveHashTest.java
License:Apache License
@Test public void testLSH() { System.setProperty("model.lsh.sampleRatio", "0.1"); System.setProperty("model.lsh.numHashes", "20"); RandomGenerator random = RandomManager.getRandom(); RunningAverage avgPercentTopRecsConsidered = new FullRunningAverage(); RunningAverage avgNDCG = new FullRunningAverage(); RunningAverage avgPercentAllItemsConsidered = new FullRunningAverage(); for (int iteration = 0; iteration < ITERATIONS; iteration++) { FastByIDMap<float[]> Y = new FastByIDMap<float[]>(); for (int i = 0; i < NUM_ITEMS; i++) { Y.put(i, RandomUtils.randomUnitVector(NUM_FEATURES, random)); }// w w w . j ava 2 s .c o m float[] userVec = RandomUtils.randomUnitVector(NUM_FEATURES, random); double[] results = doTestRandomVecs(Y, userVec); double percentTopRecsConsidered = results[0]; double ndcg = results[1]; double percentAllItemsConsidered = results[2]; log.info("Considered {}% of all candidates, {} nDCG, got {}% recommendations correct", 100 * percentAllItemsConsidered, ndcg, 100 * percentTopRecsConsidered); avgPercentTopRecsConsidered.addDatum(percentTopRecsConsidered); avgNDCG.addDatum(ndcg); avgPercentAllItemsConsidered.addDatum(percentAllItemsConsidered); } log.info(avgPercentTopRecsConsidered.toString()); log.info(avgNDCG.toString()); log.info(avgPercentAllItemsConsidered.toString()); assertTrue(avgPercentTopRecsConsidered.getAverage() > 0.55); assertTrue(avgNDCG.getAverage() > 0.55); assertTrue(avgPercentAllItemsConsidered.getAverage() < 0.075); }
From source file:net.ufida.info.mahout.common.MemoryDiffStorage.java
License:Apache License
private RunningAverage buildRunningAverage() { return stdDevWeighted ? new FullRunningAverageAndStdDev() : new FullRunningAverage(); }