List of usage examples for org.apache.mahout.cf.taste.impl.recommender GenericRecommendedItem GenericRecommendedItem
public GenericRecommendedItem(long itemID, float value)
From source file:CollaborativeRecommender.java
License:Apache License
/** * Method that creates a list of recommendations. * Gets lists of recommendations from ItemRecommender.java and UserbasedRecommender.java and merges these two list to create one * list of recommendations. This list of recommendations is inserted into the database. * Already rated items are excluded. Depending on the value of the field "add", items viewed at front end may also be excluded. * //from w ww. j a v a2s.co m * @return the size of the list of recommendations * @throws TasteException thrown if there is a exception from Mahout */ public int runCollaborativeRecommender() throws TasteException { /* itembased and userbased recommendations arrays initialized */ ArrayList<CollaborativeRecommendation> itembased = new ArrayList<CollaborativeRecommendation>(); ArrayList<CollaborativeRecommendation> userbased = new ArrayList<CollaborativeRecommendation>(); ArrayList<CollaborativeRecommendation> itemremoved = new ArrayList<CollaborativeRecommendation>(); ArrayList<CollaborativeRecommendation> userremoved = new ArrayList<CollaborativeRecommendation>(); /* Both itembased and userbased will be collected to this arraylist */ ArrayList<CollaborativeRecommendation> collaborativeRecommendations = new ArrayList<CollaborativeRecommendation>(); /* Database setup */ DatabaseConnection db = new DatabaseConnection("collaborative_view"); db.setConnection(); db.setDataModel(); DataModel model = db.getDataModel(); /* run the item and user recommenders */ ItemRecommender IR = new ItemRecommender(userId); itembased = IR.RunItemRecommender(model); UserbasedRecommender UR = new UserbasedRecommender(userId); userbased = UR.RunUserbasedRecommender(model); /* Loop through all recommendations average result from user and item based, remove duplicates */ for (CollaborativeRecommendation itemrecommendation : itembased) { float average_recommender_value = 0; for (CollaborativeRecommendation userrecommendation : userbased) { if (itemrecommendation.getItem().getItemID() == userrecommendation.getItem().getItemID()) { /* Find the average value if both user and item based has the recommendation */ average_recommender_value = (itemrecommendation.getItem().getValue() + userrecommendation.getItem().getValue()) / 2; /* Add to collaborative list and remove the recommendation from both lists */ collaborativeRecommendations.add(new CollaborativeRecommendation( new GenericRecommendedItem(itemrecommendation.getItem().getItemID(), average_recommender_value), itemrecommendation.getUserId(), "item and user based")); itemremoved.add(itemrecommendation); userremoved.add(userrecommendation); } } } /* remove duplicates present in both lists */ for (CollaborativeRecommendation recommendation : itemremoved) { itembased.remove(recommendation); } for (CollaborativeRecommendation recommendation : userremoved) { userbased.remove(recommendation); } /* add results unique to each list */ for (CollaborativeRecommendation recommendation : itembased) { collaborativeRecommendations.add(recommendation); } for (CollaborativeRecommendation recommendation : userbased) { collaborativeRecommendations.add(recommendation); } /* Sort the final results list */ Collections.sort(collaborativeRecommendations, new CompareCollaborative()); /*Find the stories that the user have rated*/ HashMap<Integer, Integer> ratedStories = db.getRated((int) userId); ArrayList<Integer> frontendStories = new ArrayList<>(); /*Find the stories already present in the recommendations list at front end * These stories should not be recommended again*/ if (add.equals("true")) { frontendStories = db.getStoriesInFrontendArray((int) userId); } /* Take the top 10 recommendations and and prepare to insert them into database */ ArrayList<DatabaseInsertObject> itemsToBeInserted = new ArrayList<>(); ArrayList<Long> idsToBeInserted = new ArrayList<>(); int ranking = 1; Random rand = new Random(); int randomDislikedRanking = rand.nextInt(6) + 5; for (CollaborativeRecommendation recommendation : collaborativeRecommendations) { /* To get a story outside of the users preferences, finds the least recommended story */ if (randomDislikedRanking == ranking) { /*Make sure the false recommendation is not already in the front end array or already among the top ten recommendation (may happen if the user doesn't have many not seen/not rated stories left) */ for (int i = 1; i < collaborativeRecommendations.size(); i++) { long dislikedStoryId = collaborativeRecommendations.get(collaborativeRecommendations.size() - i) .getItem().getItemID(); if (!frontendStories.contains((int) dislikedStoryId) && !idsToBeInserted.contains(dislikedStoryId) && ratedStories.get((int) dislikedStoryId) == null) { itemsToBeInserted.add(new DatabaseInsertObject((int) userId, "DF." + dislikedStoryId, "FalseRecommendation", 1, 0, ranking, collaborativeRecommendations .get(collaborativeRecommendations.size() - i).getItem().getValue())); idsToBeInserted.add(dislikedStoryId); System.out.print("False recommend: "); System.out.println(dislikedStoryId); break; } } ranking++; if (ranking > 10) { break; } continue; } /*If the item has not been rated,is not already in the recommendation list at front end or already a false recommendation we insert it*/ if ((ratedStories.get((int) recommendation.getItem().getItemID()) == null) && !frontendStories.contains((int) recommendation.getItem().getItemID()) && !idsToBeInserted.contains(recommendation.getItem().getItemID())) { /*Get the 30 items that had most influence on the recommendation*/ if (recommendation.getExplanation().equals("item")) { List<RecommendedItem> becauseItems = IR.getRecommender().recommendedBecause(userId, recommendation.getItem().getItemID(), 30); int counter = 1; ArrayList<RecommendedItem> explanationItems = new ArrayList<>(); for (RecommendedItem because : becauseItems) { /*Add story to explanation if this story has been rated and the rating is good*/ if (!explanationItems.contains(because) && ratedStories.get((int) because.getItemID()) != null && ratedStories.get((int) because.getItemID()) > 2) { explanationItems.add(because); counter++; } if (counter > 3) { break; } } String explanation = db.createExplanation(explanationItems); itemsToBeInserted.add(new DatabaseInsertObject((int) this.userId, "DF." + recommendation.getItem().getItemID(), explanation, 0, 1, ranking, recommendation.getItem().getValue())); idsToBeInserted.add(recommendation.getItem().getItemID()); System.out.println(recommendation.getItem()); ranking++; } else { itemsToBeInserted.add(new DatabaseInsertObject((int) this.userId, "DF." + recommendation.getItem().getItemID(), recommendation.getExplanation(), 0, 1, ranking, recommendation.getItem().getValue())); System.out.println(recommendation.getItem()); ranking++; } if (ranking > 10) { break; } } } /* Put the list of all possible recommendations in the model */ this.recommendations = collaborativeRecommendations; /*Delete the current recommendations stored in stored_story that has not been seen by the user*/ db.deleteRecommendations((int) userId); /* Insert new recommendations into the database */ db.insertUpdateRecommendValues(itemsToBeInserted); /* Close connection */ db.closeConnection(); /* Return number of recommendations possible */ return collaborativeRecommendations.size(); }
From source file:DatabaseConnectionTest.java
License:Apache License
/** * Testing if createExplanation return the expected string *///from ww w.ja v a 2s . com @Test public void createExplanationTest() { /*Create some recommendedItems to use in the explanation*/ ArrayList<RecommendedItem> explanationItems = new ArrayList<>(); RecommendedItem item1 = new GenericRecommendedItem(1098, 0); RecommendedItem item2 = new GenericRecommendedItem(1115, 0); RecommendedItem item3 = new GenericRecommendedItem(1501, 0); explanationItems.add(item1); explanationItems.add(item2); explanationItems.add(item3); /*Get the actual string produced by the method*/ String actualString = connection.createExplanation(explanationItems); /*The result we are expecting*/ String expectedString = "DF.1098:Legeliv i Trondhjem,DF.1115:Evig eies kun det teipte,DF.1501:Dr. Pinnebergs jul - et arkivmysterium i to akter"; assertEquals(expectedString, actualString); }
From source file:cn.edu.bjtu.cit.recommender.Recommender.java
License:Apache License
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println();// w w w . j a v a 2 s. co m System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output [profiling] [estimation] [clustersize]"); System.err.println(); printUsage(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } OptionParser parser = new OptionParser(args); Pipeline pipeline = new MRPipeline(Recommender.class, getConf()); if (parser.hasOption(CLUSTER_SIZE)) { pipeline.getConfiguration().setInt(ClusterOracle.CLUSTER_SIZE, Integer.parseInt(parser.getOption(CLUSTER_SIZE).getValue())); } if (parser.hasOption(PROFILING)) { pipeline.getConfiguration().setBoolean(Profiler.IS_PROFILE, true); this.profileFilePath = parser.getOption(PROFILING).getValue(); } if (parser.hasOption(ESTIMATION)) { estFile = parser.getOption(ESTIMATION).getValue(); est = new Estimator(estFile, clusterSize); } if (parser.hasOption(OPT_REDUCE)) { pipeline.getConfiguration().setBoolean(OPT_REDUCE, true); } if (parser.hasOption(OPT_MSCR)) { pipeline.getConfiguration().setBoolean(OPT_MSCR, true); } if (parser.hasOption(ACTIVE_THRESHOLD)) { threshold = Integer.parseInt(parser.getOption("at").getValue()); } if (parser.hasOption(TOP)) { top = Integer.parseInt(parser.getOption("top").getValue()); } profiler = new Profiler(pipeline); /* * input node */ PCollection<String> lines = pipeline.readTextFile(args[0]); if (profiler.isProfiling() && lines.getSize() > 10 * 1024 * 1024) { lines = lines.sample(0.1); } /* * S0 + GBK */ PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() { @Override public Pair<Long, Long> map(String input) { String[] split = input.split(Estimator.DELM); long userID = Long.parseLong(split[0]); long itemID = Long.parseLong(split[1]); return Pair.of(userID, itemID); } @Override public float scaleFactor() { return est.getScaleFactor("S0").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S0").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey(est.getClusterSize()); /* * S1 */ PTable<Long, Vector> userVector = userWithPrefs .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() { @Override public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (long itemPref : input.second()) { userVector.set((int) itemPref, 1.0f); } return Pair.of(input.first(), userVector); } @Override public float scaleFactor() { return est.getScaleFactor("S1").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S1").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); userVector = profiler.profile("S0-S1", pipeline, userVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S2 */ PTable<Long, Vector> filteredUserVector = userVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, Vector>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, Vector>> emitter) { if (input.second().getNumNondefaultElements() > threshold) { emitter.emit(input); } } @Override public float scaleFactor() { return est.getScaleFactor("S2").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S2").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); filteredUserVector = profiler.profile("S2", pipeline, filteredUserVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S3 + GBK */ PGroupedTable<Integer, Integer> coOccurencePairs = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) { Iterator<Vector.Element> it = input.second().iterateNonZero(); while (it.hasNext()) { int index1 = it.next().index(); Iterator<Vector.Element> it2 = input.second().iterateNonZero(); while (it2.hasNext()) { int index2 = it2.next().index(); emitter.emit(Pair.of(index1, index2)); } } } @Override public float scaleFactor() { float size = est.getScaleFactor("S3").sizeFactor; return size; } @Override public float scaleFactorByRecord() { float recs = est.getScaleFactor("S3").recsFactor; return recs; } }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey(est.getClusterSize()); /* * S4 */ PTable<Integer, Vector> coOccurenceVector = coOccurencePairs .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() { @Override public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) { Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (int itemIndex2 : input.second()) { cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } return Pair.of(input.first(), cooccurrenceRow); } @Override public float scaleFactor() { return est.getScaleFactor("S4").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S4").recsFactor; } }, Writables.tableOf(Writables.ints(), Writables.vectors())); coOccurenceVector = profiler.profile("S3-S4", pipeline, coOccurenceVector, ProfileConverter.int_vector(), Writables.tableOf(Writables.ints(), Writables.vectors())); /* * S5 Wrapping co-occurrence columns */ PTable<Integer, VectorOrPref> wrappedCooccurrence = coOccurenceVector .parallelDo(new MapFn<Pair<Integer, Vector>, Pair<Integer, VectorOrPref>>() { @Override public Pair<Integer, VectorOrPref> map(Pair<Integer, Vector> input) { return Pair.of(input.first(), new VectorOrPref(input.second())); } @Override public float scaleFactor() { return est.getScaleFactor("S5").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S5").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); wrappedCooccurrence = profiler.profile("S5", pipeline, wrappedCooccurrence, ProfileConverter.int_vopv(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S6 Splitting user vectors */ PTable<Integer, VectorOrPref> userVectorSplit = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, VectorOrPref>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, VectorOrPref>> emitter) { long userID = input.first(); Vector userVector = input.second(); Iterator<Vector.Element> it = userVector.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int itemIndex = e.index(); float preferenceValue = (float) e.get(); emitter.emit(Pair.of(itemIndex, new VectorOrPref(userID, preferenceValue))); } } @Override public float scaleFactor() { return est.getScaleFactor("S6").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S6").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); userVectorSplit = profiler.profile("S6", pipeline, userVectorSplit, ProfileConverter.int_vopp(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S7 Combine VectorOrPrefs */ PTable<Integer, VectorAndPrefs> combinedVectorOrPref = wrappedCooccurrence.union(userVectorSplit) .groupByKey(est.getClusterSize()) .parallelDo(new DoFn<Pair<Integer, Iterable<VectorOrPref>>, Pair<Integer, VectorAndPrefs>>() { @Override public void process(Pair<Integer, Iterable<VectorOrPref>> input, Emitter<Pair<Integer, VectorAndPrefs>> emitter) { Vector vector = null; List<Long> userIDs = Lists.newArrayList(); List<Float> values = Lists.newArrayList(); for (VectorOrPref vop : input.second()) { if (vector == null) { vector = vop.getVector(); } long userID = vop.getUserID(); if (userID != Long.MIN_VALUE) { userIDs.add(vop.getUserID()); } float value = vop.getValue(); if (!Float.isNaN(value)) { values.add(vop.getValue()); } } emitter.emit(Pair.of(input.first(), new VectorAndPrefs(vector, userIDs, values))); } @Override public float scaleFactor() { return est.getScaleFactor("S7").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S7").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); combinedVectorOrPref = profiler.profile("S5+S6-S7", pipeline, combinedVectorOrPref, ProfileConverter.int_vap(), Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); /* * S8 Computing partial recommendation vectors */ PTable<Long, Vector> partialMultiply = combinedVectorOrPref .parallelDo(new DoFn<Pair<Integer, VectorAndPrefs>, Pair<Long, Vector>>() { @Override public void process(Pair<Integer, VectorAndPrefs> input, Emitter<Pair<Long, Vector>> emitter) { Vector cooccurrenceColumn = input.second().getVector(); List<Long> userIDs = input.second().getUserIDs(); List<Float> prefValues = input.second().getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); if (userID != Long.MIN_VALUE) { float prefValue = prefValues.get(i); Vector partialProduct = cooccurrenceColumn.times(prefValue); emitter.emit(Pair.of(userID, partialProduct)); } } } @Override public float scaleFactor() { return est.getScaleFactor("S8").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S8").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())).groupByKey(est.getClusterSize()) .combineValues(new CombineFn<Long, Vector>() { @Override public void process(Pair<Long, Iterable<Vector>> input, Emitter<Pair<Long, Vector>> emitter) { Vector partial = null; for (Vector vector : input.second()) { partial = partial == null ? vector : partial.plus(vector); } emitter.emit(Pair.of(input.first(), partial)); } @Override public float scaleFactor() { return est.getScaleFactor("combine").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("combine").recsFactor; } }); partialMultiply = profiler.profile("S8-combine", pipeline, partialMultiply, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S9 Producing recommendations from vectors */ PTable<Long, RecommendedItems> recommendedItems = partialMultiply .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, RecommendedItems>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, RecommendedItems>> emitter) { Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(11, Collections.reverseOrder(BY_PREFERENCE_VALUE)); Iterator<Vector.Element> recommendationVectorIterator = input.second().iterateNonZero(); while (recommendationVectorIterator.hasNext()) { Vector.Element element = recommendationVectorIterator.next(); int index = element.index(); float value = (float) element.get(); if (topItems.size() < top) { topItems.add(new GenericRecommendedItem(index, value)); } else if (value > topItems.peek().getValue()) { topItems.add(new GenericRecommendedItem(index, value)); topItems.poll(); } } List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size()); recommendations.addAll(topItems); Collections.sort(recommendations, BY_PREFERENCE_VALUE); emitter.emit(Pair.of(input.first(), new RecommendedItems(recommendations))); } @Override public float scaleFactor() { return est.getScaleFactor("S9").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S9").recsFactor; } }, Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); recommendedItems = profiler.profile("S9", pipeline, recommendedItems, ProfileConverter.long_ri(), Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); /* * Profiling */ if (profiler.isProfiling()) { profiler.writeResultToFile(profileFilePath); profiler.cleanup(pipeline.getConfiguration()); return 0; } /* * asText */ pipeline.writeTextFile(recommendedItems, args[1]); PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java
License:Apache License
public TopK<RecommendedItem> buildRecommends(Vector userRatings, Vector userFeatures, int topK) { final Map<Integer, Boolean> alreadyRated = VectorUtils.keys(userRatings); final TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(topK, VectorUtils.BY_PREFERENCE_VALUE); Iterator<MatrixSlice> rows = Y.iterator(); while (rows.hasNext()) { MatrixSlice row = rows.next();//from w w w. j av a 2s . com int itemId = row.index(); Vector itemFeatures = row.vector(); if (!alreadyRated.containsKey(itemId)) { double predictedRating = userFeatures.dot(itemFeatures); topKItems.offer(new GenericRecommendedItem(itemId, (float) predictedRating)); } } return topKItems; }
From source file:net.myrrix.client.ClientRecommender.java
License:Apache License
private static List<RecommendedItem> consumeItems(HttpURLConnection connection) throws IOException { List<RecommendedItem> result = Lists.newArrayList(); BufferedReader reader = IOUtils.bufferStream(connection.getInputStream()); try {//w w w. j a v a2s . c o m String line; while ((line = reader.readLine()) != null) { Iterator<String> tokens = COMMA.split(line).iterator(); long itemID = Long.parseLong(tokens.next()); float value = LangUtils.parseFloat(tokens.next()); result.add(new GenericRecommendedItem(itemID, value)); } } finally { Closeables.close(reader, true); } return result; }
From source file:net.myrrix.common.TopNTest.java
License:Apache License
private static List<RecommendedItem> makeNCandidates(int n) { List<RecommendedItem> candidates = Lists.newArrayListWithCapacity(n); for (int i = 1; i <= n; i++) { candidates.add(new GenericRecommendedItem(i, i)); }//from www .ja v a 2s . c o m return candidates; }
From source file:net.myrrix.online.eval.AbstractEvaluator.java
License:Apache License
private static DataFileContents readDataFile(File dataDir, double evaluationPercentage, RescorerProvider provider) throws IOException { // evaluationPercentage filters per user and item, not per datum, since time scales with users and // items. We select sqrt(evaluationPercentage) of users and items to overall select about evaluationPercentage // of all data. int perMillion = (int) (1000000 * FastMath.sqrt(evaluationPercentage)); Multimap<Long, RecommendedItem> data = ArrayListMultimap.create(); Multimap<String, RecommendedItem> itemTags = ArrayListMultimap.create(); Multimap<String, RecommendedItem> userTags = ArrayListMultimap.create(); for (File dataFile : dataDir.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) { log.info("Reading {}", dataFile); int count = 0; for (CharSequence line : new FileLineIterable(dataFile)) { Iterator<String> parts = COMMA_TAB_SPLIT.split(line).iterator(); String userIDString = parts.next(); if (userIDString.hashCode() % 1000000 <= perMillion) { String itemIDString = parts.next(); if (itemIDString.hashCode() % 1000000 <= perMillion) { Long userID = null; boolean userIsTag = userIDString.startsWith("\""); if (!userIsTag) { userID = Long.valueOf(userIDString); }/*from w w w . jav a2s. co m*/ boolean itemIsTag = itemIDString.startsWith("\""); Long itemID = null; if (!itemIsTag) { itemID = Long.valueOf(itemIDString); } Preconditions.checkArgument(!(userIsTag && itemIsTag), "Can't have a user tag and item tag in one line"); if (parts.hasNext()) { String token = parts.next().trim(); if (!token.isEmpty()) { float value = LangUtils.parseFloat(token); if (userIsTag) { itemTags.put(userIDString, new GenericRecommendedItem(itemID, value)); } else if (itemIsTag) { userTags.put(itemIDString, new GenericRecommendedItem(userID, value)); } else { if (provider != null) { IDRescorer rescorer = provider.getRecommendRescorer(new long[] { userID }, (MyrrixRecommender) null); if (rescorer != null) { value = (float) rescorer.rescore(itemID, value); } } data.put(userID, new GenericRecommendedItem(itemID, value)); } } // Ignore remove lines } else { if (userIsTag) { itemTags.put(userIDString, new GenericRecommendedItem(itemID, 1.0f)); } else if (itemIsTag) { userTags.put(itemIDString, new GenericRecommendedItem(userID, 1.0f)); } else { float value = 1.0f; if (provider != null) { IDRescorer rescorer = provider.getRecommendRescorer(new long[] { userID }, (MyrrixRecommender) null); if (rescorer != null) { value = (float) rescorer.rescore(itemID, value); } } data.put(userID, new GenericRecommendedItem(itemID, value)); } } } } if (++count % 1000000 == 0) { log.info("Finished {} lines", count); } } } return new DataFileContents(data, itemTags, userTags); }
From source file:net.myrrix.online.eval.ReconstructionEvaluator.java
License:Apache License
private static Multimap<Long, RecommendedItem> readAndCopyDataFiles(File dataDir, File tempDir) throws IOException { Multimap<Long, RecommendedItem> data = ArrayListMultimap.create(); for (File dataFile : dataDir.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) { log.info("Reading {}", dataFile); int count = 0; for (CharSequence line : new FileLineIterable(dataFile)) { Iterator<String> parts = COMMA_TAB_SPLIT.split(line).iterator(); long userID = Long.parseLong(parts.next()); long itemID = Long.parseLong(parts.next()); if (parts.hasNext()) { String token = parts.next().trim(); if (!token.isEmpty()) { data.put(userID, new GenericRecommendedItem(itemID, LangUtils.parseFloat(token))); }/*from w w w .j av a2 s.co m*/ // Ignore remove lines } else { data.put(userID, new GenericRecommendedItem(itemID, 1.0f)); } if (++count % 1000000 == 0) { log.info("Finished {} lines", count); } } Files.copy(dataFile, new File(tempDir, dataFile.getName())); } return data; }
From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java
License:Apache License
/** * find the top entries in recommendationVector, map them to the real itemIDs and write back the result */// w w w .ja v a 2 s .c om private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context) throws IOException, InterruptedException { Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1, Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance())); Iterator<Vector.Element> recommendationVectorIterator = recommendationVector.iterateNonZero(); while (recommendationVectorIterator.hasNext()) { Vector.Element element = recommendationVectorIterator.next(); int index = element.index(); long itemID = indexItemIDMap.get(index); if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) { float value = (float) element.get(); if (!Float.isNaN(value)) { if (topItems.size() < recommendationsPerUser) { topItems.add(new GenericRecommendedItem(itemID, value)); } else if (value > topItems.peek().getValue()) { topItems.add(new GenericRecommendedItem(itemID, value)); topItems.poll(); } } } } if (!topItems.isEmpty()) { List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size()); recommendations.addAll(topItems); Collections.sort(recommendations, ByValueRecommendedItemComparator.getInstance()); context.write(userID, new RecommendedItemsWritable(recommendations)); } }
From source file:org.plista.kornakapi.core.recommender.ArrayTopItems.java
License:Apache License
public static List<RecommendedItem> getTopItems(int howMany, long[] possibleItemIDs, int fromIndex, int toIndex, IDRescorer rescorer, TopItems.Estimator<Long> estimator) throws TasteException { Preconditions.checkArgument(possibleItemIDs != null, "possibleItemIDs is null"); Preconditions.checkArgument(estimator != null, "estimator is null"); Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(howMany + 1, Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance())); boolean full = false; double lowestTopValue = Double.NEGATIVE_INFINITY; for (int index = fromIndex; index < toIndex; index++) { long itemID = possibleItemIDs[index]; if (rescorer == null || !rescorer.isFiltered(itemID)) { double preference; try { preference = estimator.estimate(itemID); } catch (NoSuchItemException nsie) { continue; } catch (NoSuchUserException nsue) { continue; }/* w w w. ja va2 s.c o m*/ double rescoredPref = rescorer == null ? preference : rescorer.rescore(itemID, preference); if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > lowestTopValue)) { topItems.add(new GenericRecommendedItem(itemID, (float) rescoredPref)); if (full) { topItems.poll(); } else if (topItems.size() > howMany) { full = true; topItems.poll(); } lowestTopValue = topItems.peek().getValue(); } } } int size = topItems.size(); if (size == 0) { return Collections.emptyList(); } List<RecommendedItem> result = Lists.newArrayListWithCapacity(size); result.addAll(topItems); Collections.sort(result, ByValueRecommendedItemComparator.getInstance()); return result; }