Example usage for org.apache.mahout.cf.taste.impl.common FullRunningAverage FullRunningAverage

List of usage examples for org.apache.mahout.cf.taste.impl.common FullRunningAverage FullRunningAverage

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.impl.common FullRunningAverage FullRunningAverage.

Prototype

public FullRunningAverage() 

Source Link

Usage

From source file:GavaFactorizer.java

License:Apache License

double getAveragePreference() throws TasteException {
    RunningAverage average = new FullRunningAverage();
    LongPrimitiveIterator it = dataModel.getUserIDs();
    while (it.hasNext()) {
        for (Preference pref : dataModel.getPreferencesFromUser(it.nextLong())) {
            average.addDatum(pref.getValue());
        }//  w ww  .  jav a  2  s . c o m
    }
    return average.getAverage();
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java

License:Apache License

/**
 * small integration test that runs the full job
 *
 * <pre>/* w w  w .jav a2s . c  o m*/
 *
 *  user-item-matrix
 *
 *          burger  hotdog  berries  icecream
 *  dog       5       5        2        -
 *  rabbit    2       -        3        5
 *  cow       -       5        -        3
 *  donkey    3       -        -        5
 *
 * </pre>
 */
@Test
public void completeJobToyExample() throws Exception {

    Double na = Double.NaN;
    Matrix preferences = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }),
                    new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }),
                    new DenseVector(new double[] { na, 5.0, na, 3.0 }),
                    new DenseVector(new double[] { 3.0, na, na, 5.0 }) });

    writeLines(inputFile, preferencesAsText(preferences));
    indexSizeFile.deleteOnExit();
    writeLines(indexSizeFile, "0,4\n1,4");

    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;

    alsFactorization
            .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
                    "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures",
                    String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations),
                    "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" });
    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
            preferences.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
            preferences.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
        while (elementIterator.hasNext()) {
            Vector.Element e = elementIterator.next();
            if (!Double.isNaN(e.get())) {
                double pref = e.get();
                double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index()));
                double err = pref - estimate;
                avg.addDatum(err * err);
                log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]",
                        new Object[] { slice.index(), e.index(), pref, estimate });
            }
        }
    }
    double rmse = Math.sqrt(avg.getAverage());
    log.info("RMSE: {}", rmse);

    assertTrue(rmse < 0.2);
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java

License:Apache License

@Test
public void completeJobImplicitToyExample() throws Exception {

    Matrix observations = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }),
                    new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }),
                    new DenseVector(new double[] { 0, 5.0, 0, 3.0 }),
                    new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) });

    Matrix preferences = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }),
                    new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }),
                    new DenseVector(new double[] { 0, 1.0, 0, 1.0 }),
                    new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) });

    writeLines(inputFile, preferencesAsText(observations));
    writeLines(indexSizeFile, "0,4\n1,4");
    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);//from   ww w.  j  a v  a  2s. c  o m

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;
    double alpha = 20;

    alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output",
            outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda",
            String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha",
            String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations",
            String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform",
            "false" });

    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
            observations.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
            observations.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nObservations - users x items\n");
    info.append(MathHelper.nice(observations));
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        for (Vector.Element e : slice.vector()) {
            if (!Double.isNaN(e.get())) {
                double pref = e.get();
                double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index()));
                double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index());
                double err = confidence * (pref - estimate) * (pref - estimate);
                avg.addDatum(err);
                log.info(
                        "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] "
                                + "estimate is [{}]",
                        new Object[] { slice.index(), e.index(), pref, confidence, estimate });
            }
        }
    }
    double rmse = Math.sqrt(avg.getAverage());
    log.info("RMSE: {}", rmse);

    assertTrue(rmse < 0.4);
}

From source file:de.tuberlin.dima.recsys.ssnmm.interactioncut.BiasedItemBasedRecommender.java

License:Apache License

public BiasedItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity, int k, double lambda2,
        double lambda3) throws TasteException {
    super(dataModel, similarity);
    this.k = k;//from  www.  j  av a2 s. c  om
    this.similarity = similarity;

    RunningAverage averageRating = new FullRunningAverage();
    LongPrimitiveIterator itemIDs = getDataModel().getItemIDs();
    while (itemIDs.hasNext()) {
        for (Preference pref : getDataModel().getPreferencesForItem(itemIDs.next())) {
            averageRating.addDatum(pref.getValue());
        }
    }

    mu = averageRating.getAverage();

    itemBiases = new OpenLongDoubleHashMap(getDataModel().getNumItems());
    userBiases = new OpenLongDoubleHashMap(getDataModel().getNumUsers());

    itemIDs = getDataModel().getItemIDs();
    while (itemIDs.hasNext()) {
        long itemID = itemIDs.nextLong();
        PreferenceArray preferences = getDataModel().getPreferencesForItem(itemID);
        double sum = 0;
        for (Preference pref : preferences) {
            sum += pref.getValue() - mu;
        }
        double bi = sum / (lambda2 + preferences.length());
        itemBiases.put(itemID, bi);
    }

    LongPrimitiveIterator userIDs = getDataModel().getUserIDs();
    while (userIDs.hasNext()) {
        long userID = userIDs.nextLong();
        PreferenceArray preferences = getDataModel().getPreferencesFromUser(userID);
        double sum = 0;
        for (Preference pref : preferences) {
            sum += pref.getValue() - mu - itemBiases.get(pref.getItemID());
        }
        double bu = sum / (lambda3 + preferences.length());
        userBiases.put(userID, bu);
    }
}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.AverageRating.java

License:Apache License

public static void main(String[] args) {

    File dir = new File("/home/ssc/Entwicklung/datasets/yahoo-songs/");

    File[] trainingFiles = dir.listFiles(new FilenameFilter() {
        @Override//from w  w w .  j  a v a  2s.  com
        public boolean accept(File dir, String name) {
            return name.startsWith("train_");
        }
    });

    Pattern sep = Pattern.compile("\t");

    RunningAverage avg = new FullRunningAverage();

    int ratingsProcessed = 0;
    for (File trainingFile : trainingFiles) {
        for (String line : Utils.readLines(trainingFile)) {
            int rating = Integer.parseInt(sep.split(line)[2]);
            avg.addDatum(rating);
            if (++ratingsProcessed % 10000000 == 0) {
                System.out.println(ratingsProcessed + " ratings processed");
            }
        }
    }
    System.out.println("average rating " + avg.getAverage());

}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java

License:Apache License

public static void main(String[] args) throws IOException {

    int numUsers = 1823179;
    int numItems = 136736;
    double mu = 3.157255412010664;

    String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/";
    String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv";
    String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv";
    String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv";
    String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv";

    Matrix similarities = new SparseRowMatrix(numItems, numItems);

    System.out.println("Reading similarities...");
    int similaritiesRead = 0;
    Configuration conf = new Configuration();
    for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) {

        int item = pair.getFirst().get();
        Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero();

        while (elements.hasNext()) {
            Vector.Element elem = elements.next();
            similarities.setQuick(item, elem.index(), elem.get());
            similaritiesRead++;//from   w  ww. ja v  a 2 s  . c o  m
        }
    }
    System.out.println("Found " + similaritiesRead + " similarities");

    Pattern sep = Pattern.compile("\t");

    double[] itemBiases = new double[numItems];
    double[] userBiases = new double[numUsers];

    System.out.println("Reading item biases");
    for (String line : new FileLineIterable(new File(itemBiasesFilePath))) {
        String[] parts = sep.split(line);
        itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    System.out.println("Reading user biases");
    for (String line : new FileLineIterable(new File(userBiasesFilePath))) {
        String[] parts = sep.split(line);
        userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator();
    Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator();

    int currentUser = 0;
    OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap();

    int usersProcessed = 0;
    RunningAverage rmse = new FullRunningAverage();
    RunningAverage mae = new FullRunningAverage();

    RunningAverage rmseBase = new FullRunningAverage();
    RunningAverage maeBase = new FullRunningAverage();

    while (trainRatings.hasNext()) {
        Rating rating = trainRatings.next();
        if (rating.user() != currentUser) {

            for (int n = 0; n < 10; n++) {
                Rating heldOutRating = heldOutRatings.next();
                Preconditions.checkState(heldOutRating.user() == currentUser);

                double preference = 0.0;
                double totalSimilarity = 0.0;
                int count = 0;

                Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item())
                        .iterateNonZero();
                while (similarItems.hasNext()) {
                    Vector.Element similarity = similarItems.next();
                    int similarItem = similarity.index();
                    if (prefs.containsKey(similarItem)) {
                        preference += similarity.get() * (prefs.get(similarItem)
                                - (mu + userBiases[currentUser] + itemBiases[similarItem]));
                        totalSimilarity += Math.abs(similarity.get());
                        count++;

                    }
                }

                double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()];
                double estimate = baselineEstimate;

                if (count > 1) {
                    estimate += preference / totalSimilarity;
                }

                double baseError = Math.abs(heldOutRating.rating() - baselineEstimate);
                maeBase.addDatum(baseError);
                rmseBase.addDatum(baseError * baseError);

                double error = Math.abs(heldOutRating.rating() - estimate);
                mae.addDatum(error);
                rmse.addDatum(error * error);

            }

            if (++usersProcessed % 10000 == 0) {
                System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
                        + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage()
                        + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage()));
            }

            currentUser = rating.user();
            prefs.clear();

        }
        prefs.put(rating.item(), rating.rating());

    }

    System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
            + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE "
            + Math.sqrt(rmseBase.getAverage()));
}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.UserItemBaseline.java

License:Apache License

void test() throws IOException {

    RunningAverage rmse = new FullRunningAverage();
    RunningAverage mae = new FullRunningAverage();

    System.out.println("Calculating predictions");
    for (Rating rating : new RatingsIterable(tests)) {

        double error = Math.abs(rating.rating() - baselineEstimate(rating.user(), rating.item()));

        mae.addDatum(error);//from   w  ww  .  j  ava 2  s . c  o  m
        rmse.addDatum(error * error);
    }

    System.out.println("MAE " + mae.getAverage() + ", RMSE: " + Math.sqrt(rmse.getAverage()));
}

From source file:lib.eval.MAERecommenderEvaluator.java

License:Apache License

@Override
protected void reset() {
    average = new FullRunningAverage();
}

From source file:net.myrrix.online.candidate.LocationSensitiveHashTest.java

License:Apache License

@Test
public void testLSH() {
    System.setProperty("model.lsh.sampleRatio", "0.1");
    System.setProperty("model.lsh.numHashes", "20");
    RandomGenerator random = RandomManager.getRandom();

    RunningAverage avgPercentTopRecsConsidered = new FullRunningAverage();
    RunningAverage avgNDCG = new FullRunningAverage();
    RunningAverage avgPercentAllItemsConsidered = new FullRunningAverage();

    for (int iteration = 0; iteration < ITERATIONS; iteration++) {

        FastByIDMap<float[]> Y = new FastByIDMap<float[]>();
        for (int i = 0; i < NUM_ITEMS; i++) {
            Y.put(i, RandomUtils.randomUnitVector(NUM_FEATURES, random));
        }//  w  w  w  . j ava  2 s  .c  o  m
        float[] userVec = RandomUtils.randomUnitVector(NUM_FEATURES, random);

        double[] results = doTestRandomVecs(Y, userVec);
        double percentTopRecsConsidered = results[0];
        double ndcg = results[1];
        double percentAllItemsConsidered = results[2];

        log.info("Considered {}% of all candidates, {} nDCG, got {}% recommendations correct",
                100 * percentAllItemsConsidered, ndcg, 100 * percentTopRecsConsidered);

        avgPercentTopRecsConsidered.addDatum(percentTopRecsConsidered);
        avgNDCG.addDatum(ndcg);
        avgPercentAllItemsConsidered.addDatum(percentAllItemsConsidered);
    }

    log.info(avgPercentTopRecsConsidered.toString());
    log.info(avgNDCG.toString());
    log.info(avgPercentAllItemsConsidered.toString());

    assertTrue(avgPercentTopRecsConsidered.getAverage() > 0.55);
    assertTrue(avgNDCG.getAverage() > 0.55);
    assertTrue(avgPercentAllItemsConsidered.getAverage() < 0.075);
}

From source file:net.ufida.info.mahout.common.MemoryDiffStorage.java

License:Apache License

private RunningAverage buildRunningAverage() {
    return stdDevWeighted ? new FullRunningAverageAndStdDev() : new FullRunningAverage();
}