processing.BLLCalculator.java Source code

Java tutorial

Introduction

Here is the source code for processing.BLLCalculator.java

Source

/*
 TagRecommender:
 A framework to implement and evaluate algorithms for the recommendation
 of tags.
 Copyright (C) 2013 Dominik Kowald
     
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
     
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
     
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package processing;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

import com.google.common.base.Stopwatch;
import com.google.common.primitives.Ints;

import common.CalculationType;
import common.CooccurenceMatrix;
import common.DoubleMapComparator;
import common.Bookmark;
import common.MapUtil;
import common.MemoryThread;
import common.PerformanceMeasurement;
import common.Utilities;
import file.PredictionFileWriter;
import file.BookmarkReader;

public class BLLCalculator {

    private final static int REC_LIMIT = 10;

    private BookmarkReader reader;
    private double dVal;
    private double beta;
    private boolean userBased;
    private boolean resBased;

    private List<Map<Integer, Double>> userMaps;
    private List<Map<Integer, Double>> userCounts;
    private List<Double> userDenoms;
    private List<Long> userTimestamps;

    private List<Map<Integer, Double>> resMaps;
    private List<Map<Integer, Double>> resCounts;
    private List<Double> resDenoms;
    private List<Long> resTimestamps;

    private List<Bookmark> trainList;

    private CooccurenceMatrix rMatrix;

    public BLLCalculator(BookmarkReader reader, int trainSize, double dVal, int beta, boolean userBased,
            boolean resBased, CalculationType cType, Double lambda) {
        this.reader = reader;
        this.dVal = dVal;//(double)dVal / 10.0;
        this.beta = (double) beta / 10.0;
        this.userBased = userBased;
        this.resBased = resBased;

        this.trainList = this.reader.getBookmarks().subList(0, trainSize);
        List<Bookmark> testList = this.reader.getBookmarks().subList(trainSize, reader.getBookmarks().size());

        this.userDenoms = new ArrayList<Double>();
        this.userTimestamps = new ArrayList<Long>();
        //if (this.userBased) {
        this.userMaps = getArtifactMaps(reader, this.trainList, testList, false, this.userTimestamps,
                this.userDenoms, this.dVal, true, lambda);
        this.userCounts = Utilities.getRelativeTagMaps(this.trainList, false);
        this.resCounts = Utilities.getRelativeTagMaps(this.trainList, true);
        if (cType != CalculationType.NONE) {
            this.rMatrix = new CooccurenceMatrix(this.trainList, reader.getTagCounts(), true);
        }
        //}
        this.resDenoms = new ArrayList<Double>();
        this.resTimestamps = new ArrayList<Long>();
        //if (this.resBased) {      
        this.resMaps = getArtifactMaps(reader, this.trainList, testList, true, this.resTimestamps, this.resDenoms,
                this.dVal, true, null);
        //}
    }

    public Map<Integer, Double> getRankedTagList(int userID, int resID, boolean sorting, CalculationType cType) {
        Map<Integer, Double> userResultMap = new LinkedHashMap<Integer, Double>();
        Map<Integer, Double> resResultMap = new LinkedHashMap<Integer, Double>();
        Map<Integer, Double> resultMap = new LinkedHashMap<Integer, Double>();
        Map<Integer, Double> userMap = null;
        Map<Integer, Double> userCount = null;
        Map<Integer, Double> resMap = null;
        Map<Integer, Double> resCount = null;
        if (this.userBased && this.userMaps != null && userID < this.userMaps.size()) {
            userMap = this.userMaps.get(userID);
            userCount = this.userCounts.get(userID);

            if (!cType.equals(CalculationType.USER_TO_RESOURCE_ONLY)) {
                for (Map.Entry<Integer, Double> entry : userMap.entrySet()) {
                    double userVal = entry.getValue().doubleValue();
                    userResultMap.put(entry.getKey(), userVal);
                }
            }

            if ((cType.equals(CalculationType.USER_TO_RESOURCE_ONLY)
                    || cType.equals(CalculationType.USER_TO_RESOURCE) || cType.equals(CalculationType.BOTH))
                    && resID < this.resMaps.size()) {
                resMap = this.resMaps.get(resID);
                resCount = this.resCounts.get(resID);
                Map<Integer, Double> associativeValues = this.rMatrix
                        .calculateAssociativeComponentsWithTagAssosiation(userCount, resCount, false, true, false);

                /*
                double denom = 0.0;
                for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                   double val = Math.log(entry.getValue());
                   denom += Math.exp(val);
                }
                for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                   entry.setValue(Math.exp(Math.log(entry.getValue())) / denom);
                }
                */
                for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                    Double val = userResultMap.get(entry.getKey());
                    userResultMap.put(entry.getKey(), val == null ? entry.getValue().doubleValue()
                            : val.doubleValue() + entry.getValue().doubleValue());
                }
                double denom = 0.0;
                for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) {
                    double val = Math.log(entry.getValue());
                    denom += Math.exp(val);
                }
                for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) {
                    entry.setValue(Math.exp(Math.log(entry.getValue())) / denom);
                }
            }
            for (Map.Entry<Integer, Double> entry : userResultMap.entrySet()) {
                double entryVal = this.beta * entry.getValue().doubleValue();
                Double val = resultMap.get(entry.getKey());
                resultMap.put(entry.getKey(), val == null ? entryVal : val.doubleValue() + entryVal);
            }
        }

        if (this.resBased) {
            if (this.resMaps != null) {
                if (resID < this.resMaps.size()) {
                    if (resMap == null || resCount == null) {
                        resMap = this.resMaps.get(resID);
                        resCount = this.resCounts.get(resID);
                    }
                    if (!cType.equals(CalculationType.RESOURCE_TO_USER_ONLY)) {
                        for (Map.Entry<Integer, Double> entry : resMap.entrySet()) {
                            double resVal = entry.getValue().doubleValue();
                            Double val = resResultMap.get(entry.getKey());
                            resResultMap.put(entry.getKey(), val == null ? resVal : val.doubleValue() + resVal);
                        }
                    }
                    if ((cType.equals(CalculationType.RESOURCE_TO_USER_ONLY)
                            || cType.equals(CalculationType.RESOURCE_TO_USER) || cType.equals(CalculationType.BOTH))
                            && userID < this.userMaps.size()) {
                        userMap = this.userMaps.get(userID);
                        userCount = this.userCounts.get(userID);
                        Map<Integer, Double> associativeValues = this.rMatrix
                                .calculateAssociativeComponentsWithTagAssosiation(resCount, userCount, false, false,
                                        true);

                        double denom = 0.0;
                        /*
                        for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                           double val = Math.log(entry.getValue());
                           denom += Math.exp(val);
                        }
                        for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                           entry.setValue(Math.exp(Math.log(entry.getValue())) / denom);
                        }
                        */
                        for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
                            Double val = resResultMap.get(entry.getKey());
                            resResultMap.put(entry.getKey(), val == null ? entry.getValue().doubleValue()
                                    : val.doubleValue() + entry.getValue().doubleValue());
                        }
                        denom = 0.0;
                        for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) {
                            double val = Math.log(entry.getValue());
                            denom += Math.exp(val);
                        }
                        for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) {
                            entry.setValue(Math.exp(Math.log(entry.getValue())) / denom);
                        }
                    }
                }
            }
            for (Map.Entry<Integer, Double> entry : resResultMap.entrySet()) {
                double entryVal = (1.0 - this.beta) * entry.getValue().doubleValue();
                Double val = resultMap.get(entry.getKey());
                resultMap.put(entry.getKey(), val == null ? entryVal : val.doubleValue() + entryVal);
            }
        }

        /*if (resultMap.size() == 0) {
           double i = 10.0;
           for (int tag : BaselineCalculator.getPopularTagList(this.reader, 10)) {
        resultMap.put(tag, i--);
           }
        }
        */
        if (sorting) {
            Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(new DoubleMapComparator(resultMap));
            sortedResultMap.putAll(resultMap);
            //Map<Integer, Double> sortedResultMap = MapUtil.sortByValue(resultMap);

            Map<Integer, Double> returnMap = new LinkedHashMap<Integer, Double>(REC_LIMIT);
            int i = 0;
            for (Map.Entry<Integer, Double> entry : sortedResultMap.entrySet()) {
                if (i++ < REC_LIMIT) {
                    returnMap.put(entry.getKey(), entry.getValue());
                } else {
                    break;
                }
            }
            return returnMap;
        }
        return resultMap;
    }

    // Basis activations values for each user
    public static List<Map<Integer, Double>> getArtifactMaps(BookmarkReader reader, List<Bookmark> userLines,
            List<Bookmark> testLines, boolean resource, List<Long> timestampList, List<Double> denomList,
            double dVal, boolean normalize, Double lambda) {

        List<Map<Integer, Double>> maps = new ArrayList<Map<Integer, Double>>();
        for (Bookmark data : userLines) {
            int refID = 0;
            //System.out.println(data);
            if (resource) {
                refID = data.getResourceID();
            } else {
                refID = data.getUserID();
            }
            long baselineTimestamp = -1;
            if (refID >= maps.size()) {
                if (resource) {
                    //refIDs = Utilities.getUsersByResource(userLines, data.getWikiID());
                    baselineTimestamp = 1;
                } else {
                    baselineTimestamp = Utilities.getBaselineTimestamp(testLines, refID, false);
                }
                timestampList.add(baselineTimestamp);
                if (baselineTimestamp != -1) {
                    maps.add(addActValue(data, new LinkedHashMap<Integer, Double>(), baselineTimestamp, resource,
                            dVal, lambda));
                } else {
                    maps.add(null);
                }
            } else {
                baselineTimestamp = timestampList.get(refID);
                if (baselineTimestamp != -1) {
                    addActValue(data, maps.get(refID), baselineTimestamp, resource, dVal, lambda);
                }
            }
        }
        // normalize values
        for (Map<Integer, Double> map : maps) {
            double denom = 0.0;
            if (map != null) {
                for (Map.Entry<Integer, Double> entry : map.entrySet()) {
                    if (entry != null) {
                        double actVal = Math.log(entry.getValue());
                        denom += Math.exp(actVal);
                        entry.setValue(actVal);
                    }
                }
                denomList.add(denom);
                if (normalize) {
                    for (Map.Entry<Integer, Double> entry : map.entrySet()) {
                        if (entry != null) {
                            double actVal = Math.exp(entry.getValue());
                            entry.setValue(actVal / denom);
                        }
                    }
                }
            }
        }

        return maps;
    }

    public static Map<Integer, Double> getSortedArtifactMapForUser(int userID, BookmarkReader reader,
            List<Bookmark> userLines, List<Bookmark> testLines, boolean resource, List<Long> timestampList,
            List<Double> denomList, double dVal, boolean normalize) {

        List<Map<Integer, Double>> artifactMaps = getArtifactMaps(reader, userLines, testLines, resource,
                timestampList, denomList, dVal, normalize, null);
        if (artifactMaps != null && userID < artifactMaps.size()) {
            Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(
                    new DoubleMapComparator(artifactMaps.get(userID)));
            sortedResultMap.putAll(artifactMaps.get(userID));
            return sortedResultMap;
        }
        return new LinkedHashMap<Integer, Double>();
    }

    public static Map<Integer, Double> getCollectiveArtifactMap(BookmarkReader reader, List<Bookmark> userLines,
            List<Bookmark> testLines, boolean resource, List<Long> timestampList, List<Double> denomList,
            double dVal, boolean normalize) {

        Map<Integer, Double> collectiveArtifactMap = new LinkedHashMap<Integer, Double>();
        List<Map<Integer, Double>> artifactMaps = getArtifactMaps(reader, userLines, testLines, resource,
                timestampList, denomList, dVal, normalize, null);
        for (Map<Integer, Double> map : artifactMaps) {
            for (Map.Entry<Integer, Double> entry : map.entrySet()) {
                Double val = collectiveArtifactMap.get(entry.getKey());
                collectiveArtifactMap.put(entry.getKey(),
                        val != null ? val.doubleValue() + entry.getValue() : entry.getValue());
            }
        }

        Map<Integer, Double> sortedResultMap = new TreeMap<Integer, Double>(
                new DoubleMapComparator(collectiveArtifactMap));
        sortedResultMap.putAll(collectiveArtifactMap);
        return sortedResultMap;
    }

    private static Map<Integer, Double> addActValue(Bookmark data, Map<Integer, Double> actValues,
            long baselineTimestamp, boolean resource, double dVal, Double lambda) {
        if (!data.getTimestamp().isEmpty()) {
            Double newAct = 0.0;
            if (resource) {
                newAct = 1.0;
            } else {
                Double recency = (double) (baselineTimestamp - Long.parseLong(data.getTimestamp()) + 1.0);
                //if (recency > 365 * 24 * 60 * 60) {
                //   newAct = 0.0;
                //} else {
                newAct = Math.pow(recency, dVal * -1.0);
                if (lambda != null) {
                    double cutoff = Math.exp(recency * lambda.doubleValue() * -1.0);
                    newAct *= cutoff;
                }
                //}
            }
            for (Integer value : data.getTags()) {
                Double oldAct = actValues.get(value);
                if (!newAct.isInfinite() && !newAct.isNaN()) {
                    actValues.put(value, (oldAct != null ? oldAct + newAct : newAct));
                } else {
                    System.out.println(
                            "BLL error: " + data.getUserID() + "_" + baselineTimestamp + " " + data.getTimestamp());
                }
            }
        }
        return actValues;
    }

    // Statics  -------------------------------------------------------------------------------------------------------------------------------------------------------------------   
    private static String timeString;

    private static List<Map<Integer, Double>> startActCreation(BookmarkReader reader, int sampleSize,
            boolean sorting, boolean userBased, boolean resBased, double dVal, int beta, CalculationType cType,
            Double lambda) {
        int size = reader.getBookmarks().size();
        int trainSize = size - sampleSize;

        Stopwatch timer = new Stopwatch();
        timer.start();
        BLLCalculator calculator = new BLLCalculator(reader, trainSize, dVal, beta, userBased, resBased, cType,
                lambda);
        timer.stop();
        long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS);
        List<Map<Integer, Double>> results = new ArrayList<Map<Integer, Double>>();
        if (trainSize == size) {
            trainSize = 0;
        }

        timer.reset();
        timer.start();
        for (int i = trainSize; i < size; i++) { // the test-set
            Bookmark data = reader.getBookmarks().get(i);
            Map<Integer, Double> map = calculator.getRankedTagList(data.getUserID(), data.getResourceID(), sorting,
                    cType);
            results.add(map);
        }
        timer.stop();
        long testTime = timer.elapsed(TimeUnit.MILLISECONDS);

        timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime,
                sampleSize);
        return results;
    }

    public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased,
            boolean resBased, double dVal, int beta, CalculationType cType, Double lambda) {
        Timer timerThread = new Timer();
        MemoryThread memoryThread = new MemoryThread();
        timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN);

        BookmarkReader reader = new BookmarkReader(trainSize, false);
        reader.readFile(filename);

        List<Map<Integer, Double>> actValues = startActCreation(reader, sampleSize, true, userBased, resBased, dVal,
                beta, cType, lambda);

        List<int[]> predictionValues = new ArrayList<int[]>();
        for (int i = 0; i < actValues.size(); i++) {
            Map<Integer, Double> modelVal = actValues.get(i);
            predictionValues.add(Ints.toArray(modelVal.keySet()));
        }
        String suffix = "_bll_c";
        if (!userBased) {
            suffix = "_bll_r";
        } else if (!resBased) {
            suffix = "_bll";
        }
        if (cType == CalculationType.USER_TO_RESOURCE) {
            suffix += "_ac";
        } else if (cType == CalculationType.USER_TO_RESOURCE_ONLY) {
            suffix = "_ac";
        }
        reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()));
        PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues);
        String outputfile = filename + suffix + "_" + beta + "_" + dVal;
        writer.writeFile(outputfile);

        timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory());
        timerThread.cancel();
        Utilities.writeStringToFile("./data/metrics/" + outputfile + "_TIME.txt", timeString);
        return reader;
    }
}