Example usage for org.apache.mahout.cf.taste.eval DataModelBuilder buildDataModel

List of usage examples for org.apache.mahout.cf.taste.eval DataModelBuilder buildDataModel

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.eval DataModelBuilder buildDataModel.

Prototype

DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData);

Source Link

Document

Builds a DataModel implementation to be used in an evaluation, given training data.

Usage

From source file:lib.eval.AbstractRecommenderEvaluator.java

License:Apache License

@Override
public double evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder,
        DataModel dataModel, double trainingPercentage, double evaluationPercentage) throws TasteException {
    Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    Preconditions.checkArgument(dataModel != null, "dataModel is null");
    Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
            "Invalid trainingPercentage: " + trainingPercentage);
    Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
            "Invalid evaluationPercentage: " + evaluationPercentage);

    log.info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);

    int numUsers = dataModel.getNumUsers();
    FastByIDMap<PreferenceArray> trainingUsers = new FastByIDMap<PreferenceArray>(
            1 + (int) (evaluationPercentage * numUsers));
    FastByIDMap<PreferenceArray> testUserPrefs = new FastByIDMap<PreferenceArray>(
            1 + (int) (evaluationPercentage * numUsers));

    LongPrimitiveIterator it = dataModel.getUserIDs();
    while (it.hasNext()) {
        long userID = it.nextLong();
        if (random.nextDouble() < evaluationPercentage) {
            processOneUser(trainingPercentage, trainingUsers, testUserPrefs, userID, dataModel);
        } else { //this user will not be used for evaluation so use all its preferences in training
            PreferenceArray trainingPrefs = dataModel.getPreferencesFromUser(userID);
            trainingUsers.put(userID, trainingPrefs);
        }/*from  w ww.j a  va  2  s  .  com*/
    }

    DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
            : dataModelBuilder.buildDataModel(trainingUsers);

    Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);

    double result = getEvaluation(testUserPrefs, recommender);
    log.info("Evaluation result: {}", result);
    return result;
}

From source file:norbert.mynemo.core.evaluation.PersonnalRecommenderEvaluator.java

License:Apache License

/**
 * If the exhaustive evaluation is off, the training percentage behave normally. If the exhaustive
 * evaluation is on, then the training percentage have two different behaviors:
 * <ul>//from  w ww  .j av a  2s.c  om
 * <li>percentage < 0.5: percentage behave normally, the evaluation is not exhaustive.</li>
 * <li>
 * <p>
 * 0.5 <= percentage: the preferences of the target user are split in several test sets. Each set
 * contains approximately the same number of user preferences. For example, if
 * <code>percentage=1</code>, then the number of set equals the number of preference of the target
 * user, each set containing one preference. If <code>percentage=0.5</code>, then two sets are
 * built, each set contains half of the preferences. If <code>percentage=0.9</code>, then ten sets
 * are built, each set contains one tenth of the preferences. Then, each set is tested as if the
 * evaluation is not exhaustive.
 * </p>
 * <p>
 * Thus, <code>1</code> provides the most precise result, but the computation may be intensive.
 * </p>
 * </li>
 * </ul>
 */
@Override
public double evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder,
        DataModel dataModel, double trainingPercentage, double evaluationPercentage) throws TasteException {

    Timer timer = Timer.createStartedTimer();

    // clear the previously computed errors
    errorStats.clear();
    squaredErrorStats.clear();
    predictionRequestNumber = 0;

    // all training preferences except the target user's one
    FastByIDMap<PreferenceArray> baseTrainingPreferences = buildBaseTrainingPreferences(dataModel,
            evaluationPercentage);

    List<List<Preference>> testSets = buildTestSets(dataModel, trainingPercentage);

    // the idea is to generate a recommendation for each preference of the
    // target user.
    for (List<Preference> currentTestSet : testSets) {
        // add the preferences of the target user
        FastByIDMap<PreferenceArray> currentTrainingPreferences = baseTrainingPreferences.clone();
        addUserPreferences(dataModel, currentTrainingPreferences, currentTestSet);

        DataModel currentTrainingModel = (dataModelBuilder == null)
                ? new GenericDataModel(currentTrainingPreferences)
                : dataModelBuilder.buildDataModel(currentTrainingPreferences);

        Recommender currentRecommender = recommenderBuilder.buildRecommender(currentTrainingModel);

        evaluate(currentTrainingModel, currentRecommender, currentTestSet);
    }

    duration = timer.stop().getDuration();

    return getEvaluationSummary(metric);
}

From source file:recommender.GenericRecommenderIRStatsEvaluatorCustom.java

License:Apache License

@Override
public IRStatistics evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder,
        DataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold,
        double evaluationPercentage) throws TasteException {

    prop = new Properties();
    try {//from  ww  w.  j a v  a2 s.  com
        prop.load(GenericRecommenderIRStatsEvaluatorCustom.class.getResourceAsStream("settings.properties"));
    } catch (IOException ex) {
        java.util.logging.Logger.getLogger(GenericRecommenderIRStatsEvaluatorCustom.class.getName())
                .log(Level.SEVERE, null, ex);
    }

    // load settings from MySQL database
    loadSettings();

    Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
    Preconditions.checkArgument(dataModel != null, "dataModel is null");
    Preconditions.checkArgument(at >= 1, "at must be at least 1");
    Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
            "Invalid evaluationPercentage: " + evaluationPercentage
                    + ". Must be: 0.0 < evaluationPercentage <= 1.0");

    int numItems = dataModel.getNumItems();
    System.out.println("Data model numItems: " + numItems);
    RunningAverage precision = new FullRunningAverage();
    RunningAverage recall = new FullRunningAverage();
    RunningAverage fallOut = new FullRunningAverage();
    RunningAverage nDCG = new FullRunningAverage();
    int numUsersRecommendedFor = 0;
    int numUsersWithRecommendations = 0;

    // map to store diversity ranges => number of users
    HashMap<String, String> map = new HashMap<>();

    LongPrimitiveIterator it = dataModel.getUserIDs();
    while (it.hasNext()) {

        long userID = it.nextLong();

        if (userID == 0) {
            continue;
        }

        // get the top users
        if (!list.contains(userID + "")) {
            continue;
        }

        if (random.nextDouble() >= evaluationPercentage) {
            // Skipped
            continue;
        }

        long start = System.currentTimeMillis();

        PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
        System.out.println("User preferences: " + prefs);

        // List some most-preferred items that would count as (most) "relevant" results
        double theRelevanceThreshold = 0;//Double.isNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
        FastIDSet relevantItemIDs = dataSplitter.getRelevantItemsIDs(userID, at, theRelevanceThreshold,
                dataModel);
        System.out.println("Relevant items: " + relevantItemIDs);
        System.out.println("Relevance threshold: " + theRelevanceThreshold);

        int numRelevantItems = relevantItemIDs.size();
        if (numRelevantItems <= 0) {
            continue;
        }

        FastByIDMap<PreferenceArray> trainingUsers = new FastByIDMap<>(dataModel.getNumUsers());
        LongPrimitiveIterator it2 = dataModel.getUserIDs();
        while (it2.hasNext()) {
            dataSplitter.processOtherUser(userID, relevantItemIDs, trainingUsers, it2.nextLong(), dataModel);
        }

        DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
                : dataModelBuilder.buildDataModel(trainingUsers);
        try {
            trainingModel.getPreferencesFromUser(userID);
        } catch (NoSuchUserException nsee) {
            continue; // Oops we excluded all prefs for the user -- just move on
        }

        int size = numRelevantItems + trainingModel.getItemIDsFromUser(userID).size();
        if (size < 2 * at) {
            // Really not enough prefs to meaningfully evaluate this user
            System.out
                    .println("Really not enough prefs (" + size + ") to meaningfully evaluate user: " + userID);
            continue;
        }

        Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);

        int intersectionSize = 0;
        List<RecommendedItem> recommendedItems = recommender.recommend(userID, at, rescorer);
        HashMap<Long, Double> user_preferences = getUserPreferencesList(userID);
        for (RecommendedItem recommendedItem : recommendedItems) {
            double preference = isRelevant(user_preferences, recommendedItem);
            System.out.println("Preference: " + preference);
            if (relevantItemIDs.contains(recommendedItem.getItemID()) || preference != 0) {
                intersectionSize++;
            }
        }

        int numRecommendedItems = recommendedItems.size();

        // Precision
        if (numRecommendedItems > 0) {
            precision.addDatum((double) intersectionSize / (double) numRecommendedItems);
            System.out.println(
                    "intersectionSize: " + intersectionSize + " numRecommendedItems: " + numRecommendedItems);
        }

        // Recall
        recall.addDatum((double) intersectionSize / (double) numRelevantItems);

        // Fall-out
        if (numRelevantItems < size) {
            fallOut.addDatum(
                    (double) (numRecommendedItems - intersectionSize) / (double) (numItems - numRelevantItems));
        }

        // nDCG
        // In computing, assume relevant IDs have relevance 1 and others 0
        double cumulativeGain = 0.0;
        double idealizedGain = 0.0;
        for (int i = 0; i < numRecommendedItems; i++) {
            RecommendedItem item = recommendedItems.get(i);
            double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here
            if (relevantItemIDs.contains(item.getItemID())) {
                cumulativeGain += discount;
            }
            // otherwise we're multiplying discount by relevance 0 so it doesn't do anything

            // Ideally results would be ordered with all relevant ones first, so this theoretical
            // ideal list starts with number of relevant items equal to the total number of relevant items
            if (i < numRelevantItems) {
                idealizedGain += discount;
            }
        }
        if (idealizedGain > 0.0) {
            nDCG.addDatum(cumulativeGain / idealizedGain);
        }

        // Reach
        numUsersRecommendedFor++;
        if (numRecommendedItems > 0) {
            numUsersWithRecommendations++;
        }

        long end = System.currentTimeMillis();

        log.info("Evaluated with user {} in {}ms", userID, end - start);
        log.info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.getAverage(),
                recall.getAverage(), fallOut.getAverage(), nDCG.getAverage(),
                (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
        System.out.println("Relevant items: " + numRelevantItems);
        System.out.println("Precision: " + precision.getAverage());
        System.out.println("Recall: " + recall.getAverage());
        System.out.println("Fall-out: " + fallOut.getAverage());
        System.out.println("nDCG: " + nDCG.getAverage());
        System.out.println("Reach: " + (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
        double diversity = getDiversity(recommendedItems);
        System.out.println("Diversity: " + diversity);
        if (diversity >= 0 && diversity < 0.1) {
            int count = map.get("0-0.1") != null ? Integer.parseInt(map.get("0-0.1")) + 1 : 1;
            map.put("0-0.1", count + "");
        } else if (diversity >= 0.1 && diversity < 0.2) {
            int count = map.get("0.1-0.2") != null ? Integer.parseInt(map.get("0.1-0.2")) + 1 : 1;
            map.put("0.1-0.2", count + "");
        } else if (diversity >= 0.2 && diversity < 0.3) {
            int count = map.get("0.2-0.3") != null ? Integer.parseInt(map.get("0.2-0.3")) + 1 : 1;
            map.put("0.2-0.3", count + "");
        } else if (diversity >= 0.3 && diversity < 0.4) {
            int count = map.get("0.3-0.4") != null ? Integer.parseInt(map.get("0.3-0.4")) + 1 : 1;
            map.put("0.3-0.4", count + "");
        } else if (diversity >= 0.4 && diversity < 0.5) {
            int count = map.get("0.4-0.5") != null ? Integer.parseInt(map.get("0.4-0.5")) + 1 : 1;
            map.put("0.4-0.5", count + "");
        } else if (diversity >= 0.5 && diversity < 0.6) {
            int count = map.get("0.5-0.6") != null ? Integer.parseInt(map.get("0.5-0.6")) + 1 : 1;
            map.put("0.5-0.6", count + "");
        } else if (diversity >= 0.6 && diversity < 0.7) {
            int count = map.get("0.6-0.7") != null ? Integer.parseInt(map.get("0.6-0.7")) + 1 : 1;
            map.put("0.6-0.7", count + "");
        } else if (diversity >= 0.7 && diversity < 0.8) {
            int count = map.get("0.7-0.8") != null ? Integer.parseInt(map.get("0.7-0.8")) + 1 : 1;
            map.put("0.7-0.8", count + "");
        } else if (diversity >= 0.8 && diversity < 0.9) {
            int count = map.get("0.8-0.9") != null ? Integer.parseInt(map.get("0.8-0.9")) + 1 : 1;
            map.put("0.8-0.9", count + "");
        } else if (diversity >= 0.9) {
            int count = map.get("0.9-1") != null ? Integer.parseInt(map.get("0.9-1")) + 1 : 1;
            map.put("0.9-1", count + "");
        }
    }

    JSONObject json = new JSONObject(map);

    writeFile(prop.getProperty("metrics_file"), json.toJSONString());

    return new IRStatisticsImplCustom(precision.getAverage(), recall.getAverage(), fallOut.getAverage(),
            nDCG.getAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor);
}

From source file:recsys.evaluator.TopKRecommenderEvaluator.java

License:Apache License

@Override
public Double evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder,
        DataModel dataModel, Double trainingPercentage, Double evaluationPercentage) throws TasteException {
    Preconditions.checkNotNull(recommenderBuilder);
    Preconditions.checkNotNull(dataModel);
    Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
            "Invalid trainingPercentage: " + trainingPercentage
                    + ". Must be: 0.0 <= trainingPercentage <= 1.0");
    Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
            "Invalid evaluationPercentage: " + evaluationPercentage
                    + ". Must be: 0.0 <= evaluationPercentage <= 1.0");

    log.info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);

    int numUsers = dataModel.getNumUsers();
    FastByIDMap<PreferenceArray> trainingPrefs = new FastByIDMap<PreferenceArray>(
            1 + (int) (evaluationPercentage * numUsers));
    FastByIDMap<PreferenceArray> testPrefs = new FastByIDMap<PreferenceArray>(
            1 + (int) (evaluationPercentage * numUsers));

    IntPrimitiveIterator it = dataModel.getUserIDs();
    while (it.hasNext()) {
        Integer userID = it.nextInt();
        if (random.nextDouble() < evaluationPercentage) {
            splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
        }/*from  ww w  .  ja  va2s .  c om*/
    }

    DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs)
            : dataModelBuilder.buildDataModel(trainingPrefs);

    Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);

    Double result = getEvaluation(testPrefs, recommender);
    log.info("Evaluation result: {}", result);
    return result;
}