file.PredictionFileReader.java Source code

Java tutorial

Introduction

Here is the source code for file.PredictionFileReader.java

Source

/*
 TagRecommender:
 A framework to implement and evaluate algorithms for the recommendation
 of tags.
 Copyright (C) 2013 Dominik Kowald
     
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
     
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
     
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package file;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import com.google.common.base.Functions;
import com.google.common.collect.Lists;

import common.Bookmark;
import common.DoubleMapComparator;
import common.PredictionData;
import common.Utilities;
import file.postprocessing.CatDescFiltering;

public class PredictionFileReader {

    private List<PredictionData> predictions;
    private String filename;
    private int predictionCount;

    public PredictionFileReader() {
        this.predictions = new ArrayList<PredictionData>();
        this.predictionCount = 0;
    }

    public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integer minBookmarks,
            Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks, CatDescFiltering categorizer) {
        try {
            this.filename = filename;
            //FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt"));
            InputStreamReader reader = new InputStreamReader(
                    new FileInputStream(new File("./data/results/" + filename + ".txt")), "UTF8");
            BufferedReader br = new BufferedReader(reader);
            String line = null;

            while ((line = br.readLine()) != null) {
                String[] lineParts = line.split("\\|");
                String[] parts = lineParts[0].split("-");
                int userID = -1;
                try {
                    userID = Integer.parseInt(parts[0]);
                } catch (Exception e) {
                    // string id - do nothing
                }
                int resID = -1;
                if (parts.length > 1) {
                    resID = Integer.parseInt(parts[1]);
                }
                if (!Utilities.isEntityEvaluated(wikiReader, userID, minBookmarks, maxBookmarks, false)
                        || !Utilities.isEntityEvaluated(wikiReader, resID, minResBookmarks, maxResBookmarks,
                                true)) {
                    continue; // skip this user if it shoudln't be evaluated - # bookmarks case
                }
                if (categorizer != null) {
                    if (!categorizer.evaluate(userID)) {
                        continue; // skip this user if it shoudln't be evaluated - categorizer case
                    }
                }
                if (lineParts.length < 3) {
                    this.predictions.add(null);
                } else {
                    List<String> realData = Arrays.asList(lineParts[1].split(", "));
                    List<String> predictionData = Arrays.asList(lineParts[2].split(", "));
                    if (predictionData.size() > 0) {
                        PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
                        this.predictions.add(data);
                        this.predictionCount++;
                    } else {
                        //System.out.println("Line does not have predictions (inner)");
                        this.predictions.add(null);
                    }
                }

            }
            if (k == 10) {
                System.out.println("Number of users to predict: " + this.predictions.size());
            }
            br.close();
            return true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public boolean readTensorFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader,
            Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks,
            CatDescFiltering categorizer) {
        this.filename = filename;
        List<Bookmark> testLines = bookmarkReader.getBookmarks().subList(trainSize,
                bookmarkReader.getBookmarks().size());

        FileReader reader;
        try {
            reader = new FileReader(new File("./data/results/" + filename + ".txt"));
            BufferedReader br = new BufferedReader(reader);
            String line = null;
            String userID = null, resID = null;
            Map<Integer, Double> tensorTags = new LinkedHashMap<Integer, Double>();
            int count = 0;
            while ((line = br.readLine()) != null) {
                String[] lineParts = line.split(" ");
                if (userID != null && resID != null
                        && (!userID.equals(lineParts[0]) || !resID.equals(lineParts[1]))) {
                    // new testline
                    List<Integer> realData = testLines.get(count++).getTags();
                    List<Integer> predictionData = new ArrayList<Integer>();
                    Map<Integer, Double> sortedTensorTags = new TreeMap<Integer, Double>(
                            new DoubleMapComparator(tensorTags));
                    sortedTensorTags.putAll(tensorTags);
                    for (Integer tag : sortedTensorTags.keySet()) {
                        predictionData.add(tag);
                    }

                    PredictionData data = new PredictionData(Integer.parseInt(userID), Integer.parseInt(resID),
                            Lists.transform(realData, Functions.toStringFunction()),
                            Lists.transform(predictionData, Functions.toStringFunction()), k);
                    this.predictions.add(data);
                    this.predictionCount++;
                    tensorTags.clear();
                }
                userID = lineParts[0];
                resID = lineParts[1];
                tensorTags.put(Integer.parseInt(lineParts[2]), Double.parseDouble(lineParts[3]));
            }
            br.close();
        } catch (Exception e) {
            e.printStackTrace();
        }

        return true;
    }

    public boolean readMyMediaLiteFile(String filename, int k, int trainSize, BookmarkReader bookmarkReader,
            Integer minBookmarks, Integer maxBookmarks, Integer minResBookmarks, Integer maxResBookmarks,
            CatDescFiltering categorizer) {
        try {
            this.filename = filename;
            List<Integer> testUsers = bookmarkReader.getUniqueUserListFromTestSet(trainSize);
            Map<Integer, List<Integer>> resourcesOfTestUsers = bookmarkReader.getResourcesOfTestUsers(trainSize);
            FileReader reader = new FileReader(new File("./data/results/" + filename + ".txt"));
            BufferedReader br = new BufferedReader(reader);
            String line = null;

            while ((line = br.readLine()) != null) {
                String[] lineParts = line.split("\\t");
                if (lineParts.length == 0) {
                    continue; // skip invalid line
                }

                int userID = -1, resID = -1;
                try {
                    userID = Integer.parseInt(lineParts[0]);
                } catch (Exception e) {
                    continue; // skip user if userid is invalid
                }

                if (!testUsers.contains(userID)) {
                    continue; // skip user if it is not part of the test-set
                }
                if (!Utilities.isEntityEvaluated(bookmarkReader, userID, minBookmarks, maxBookmarks, false)) {
                    continue; // skip this user if it shoudln't be evaluated - # bookmarks case
                }
                if (categorizer != null) {
                    if (!categorizer.evaluate(userID)) {
                        continue; // skip this user if it shoudln't be evaluated - categorizer case
                    }
                }
                List<Integer> testResources = resourcesOfTestUsers.get(userID);
                List<String> realData = new ArrayList<String>();
                for (int testRes : testResources) {
                    realData.add(bookmarkReader.getResources().get(testRes));
                }

                if (lineParts.length > 1) {
                    String recommendationString = lineParts[1].replace("[", "").replace("]", "");
                    List<String> predictionStringData = Arrays.asList(recommendationString.split(","));
                    if (predictionStringData.size() > 0) {
                        List<String> predictionData = new ArrayList<String>();
                        for (String predictionString : predictionStringData) {
                            predictionData.add(predictionString.substring(0, predictionString.indexOf(":")));
                        }
                        PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
                        this.predictions.add(data);
                        this.predictionCount++;
                    } else {
                        //System.out.println("Line does not have predictions (inner)");
                        this.predictions.add(null);
                    }
                } else {
                    //System.out.println("Line does not have predictions (outer)");
                    this.predictions.add(null);
                }
            }
            if (k == 1) {
                System.out.println("Number of users to predict: " + this.predictions.size());
            }
            br.close();
            return true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    // Getter ------------------------------------------------------------------------------------------------   
    public List<PredictionData> getPredictionData() {
        return this.predictions;
    }

    public String getFilename() {
        return this.filename;
    }

    public int getPredictionCount() {
        return this.predictionCount;
    }
}