processing.MPurCalculator.java Source code

Java tutorial

Introduction

Here is the source code for processing.MPurCalculator.java

Source

/*
 TagRecommender:
 A framework to implement and evaluate algorithms for the recommendation
 of tags.
 Copyright (C) 2013 Dominik Kowald
     
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
     
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
     
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package processing;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

import com.google.common.base.Stopwatch;
import com.google.common.primitives.Ints;

import file.PredictionFileWriter;
import file.BookmarkReader;
import common.DoubleMapComparator;
import common.Bookmark;
import common.MemoryThread;
import common.PerformanceMeasurement;
import common.Utilities;

public class MPurCalculator {

    private final static int REC_LIMIT = 10;

    private BookmarkReader reader;
    private double beta;
    private boolean userBased;
    private boolean resBased;

    private List<Map<Integer, Integer>> userMaps;
    private List<Double> userDenoms;
    private List<Map<Integer, Integer>> resMaps;
    private List<Double> resDenoms;

    public MPurCalculator(BookmarkReader reader, int trainSize, int beta, boolean userBased, boolean resBased) {
        this.reader = reader;
        this.beta = (double) beta / 10.0;
        this.userBased = userBased;
        this.resBased = resBased;

        List<Bookmark> trainList = this.reader.getBookmarks().subList(0, trainSize);
        if (this.userBased) {
            this.userMaps = Utilities.getUserMaps(trainList);
            this.userDenoms = getDenoms(this.userMaps);
        }
        if (this.resBased) {
            this.resMaps = Utilities.getResMaps(trainList);
            this.resDenoms = getDenoms(this.resMaps);
        }
    }

    private List<Double> getDenoms(List<Map<Integer, Integer>> maps) {
        List<Double> denoms = new ArrayList<Double>();
        for (Map<Integer, Integer> map : maps) {
            double denom = 0.0;
            for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
                denom += Math.pow(Math.E, entry.getValue());
            }
            denoms.add(denom);
        }

        return denoms;
    }

    public Map<Integer, Double> getRankedTagList(int userID, int resID, boolean sorting) {
        Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>();
        if (this.userBased && this.userMaps != null && userID < this.userMaps.size()) {
            Map<Integer, Integer> userMap = this.userMaps.get(userID);
            for (Map.Entry<Integer, Integer> entry : userMap.entrySet()) {
                double userVal = this.beta
                        * (Math.exp(entry.getValue().doubleValue()) / this.userDenoms.get(userID));
                resultMap.put(entry.getKey(), userVal);
            }
        }
        if (this.resBased && this.resMaps != null && resID < this.resMaps.size()) {
            Map<Integer, Integer> resMap = this.resMaps.get(resID);
            for (Map.Entry<Integer, Integer> entry : resMap.entrySet()) {
                double resVal = (1.0 - this.beta)
                        * (Math.exp(entry.getValue().doubleValue()) / this.resDenoms.get(resID));
                Double val = resultMap.get(entry.getKey());
                resultMap.put(entry.getKey(), val == null ? resVal : val.doubleValue() + resVal);
            }
        }

        if (sorting) {
            Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resultMap));
            sortedResultMap.putAll(resultMap);

            Map<Integer, Double> returnMap = new LinkedHashMap<Integer, Double>(REC_LIMIT);
            int i = 0;
            for (Map.Entry<Integer, Double> entry : sortedResultMap.entrySet()) {
                if (i++ < REC_LIMIT) {
                    returnMap.put(entry.getKey(), entry.getValue());
                } else {
                    break;
                }
            }
            return returnMap;
        }
        return resultMap;
    }

    // Statics ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------   
    private static String timeString;

    public static List<Map<Integer, Double>> startLanguageModelCreation(BookmarkReader reader, int sampleSize,
            boolean sorting, boolean userBased, boolean resBased, int beta) {
        int size = reader.getBookmarks().size();
        int trainSize = size - sampleSize;

        Stopwatch timer = new Stopwatch();
        timer.start();
        MPurCalculator calculator = new MPurCalculator(reader, trainSize, beta, userBased, resBased);
        timer.stop();
        long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS);
        List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
        if (trainSize == size) {
            trainSize = 0;
        }

        timer.reset();
        timer.start();
        for (int i = trainSize; i < size; i++) { // the test-set
            Bookmark data = reader.getBookmarks().get(i);
            Map<Integer, Double> map = calculator.getRankedTagList(data.getUserID(), data.getResourceID(), sorting);
            results.add(map);
        }
        timer.stop();
        long testTime = timer.elapsed(TimeUnit.MILLISECONDS);

        timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime,
                sampleSize);
        return results;
    }

    public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased,
            boolean resBased, int beta) {
        Timer timerThread = new Timer();
        MemoryThread memoryThread = new MemoryThread();
        timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN);

        BookmarkReader reader = new BookmarkReader(trainSize, false);
        reader.readFile(filename);

        List<Map<Integer, Double>> modelValues = startLanguageModelCreation(reader, sampleSize, true, userBased,
                resBased, beta);

        List<int[]> predictionValues = new ArrayList<int[]>();
        for (int i = 0; i < modelValues.size(); i++) {
            Map<Integer, Double> modelVal = modelValues.get(i);
            predictionValues.add(Ints.toArray(modelVal.keySet()));
        }
        String suffix = "_mp_ur_";
        if (!userBased) {
            suffix = "_mp_r_";
        } else if (!resBased) {
            suffix = "_mp_u_";
        }
        reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()));
        PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
        String outputFile = filename + suffix + beta;
        writer.writeFile(outputFile);

        timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory());
        timerThread.cancel();
        Utilities.writeStringToFile("./data/metrics/" + outputFile + "_TIME.txt", timeString);
        return reader;
    }
}