List of usage examples for org.apache.mahout.common RandomUtils getRandom
public static Random getRandom(long seed)
From source file:com.elex.dmp.lda.CachingCVB0Mapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { log.info("Retrieving configuration"); Configuration conf = context.getConfiguration(); float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN); float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN); long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L); numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1); int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1); int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1); int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4); maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10); float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f); log.info("Initializing read model"); TopicModel readModel;/*from www. j a v a 2 s . c o m*/ Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found"); readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null, numTrainThreads, modelWeight); } log.info("Initializing write model"); TopicModel writeModel = modelWeight == 1 ? new TopicModel(numTopics, numTerms, eta, alpha, null, numUpdateThreads) : readModel; log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, writeModel, numTrainThreads, numTopics, numTerms); modelTrainer.start(); }
From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { MemoryUtil.startMemoryLogger(5000);//from w w w . j av a 2s . com log.info("Retrieving configuration"); Configuration conf = context.getConfiguration(); float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN); float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN); long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L); random = RandomUtils.getRandom(seed); numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1); int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1); int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1); int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4); maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10); float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f); testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f); log.info("Initializing read model"); TopicModel readModel; Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found"); readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null, numTrainThreads, modelWeight); } log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms); log.info("Initializing topic vector"); topicVector = new DenseVector(new double[numTopics]); }
From source file:com.predictionmarketing.itemrecommend.CliMF.java
License:Apache License
protected void prepareTraining() throws TasteException { RandomWrapper random = (RandomWrapper) RandomUtils.getRandom(0L); userVectors = new double[dataModel.getNumUsers()][numFeatures]; itemVectors = new double[dataModel.getNumItems()][numFeatures]; LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs(); while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug long userID = socialuser.nextLong(); SocialIDset.add(userID);//w w w . java2 s. c o m } // double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * randomNoise; } //unit vectorize userVectors[userIndex] = unitvectorize(userVectors[userIndex]); } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise; } //unit vectorize itemVectors[itemIndex] = unitvectorize(itemVectors[itemIndex]); } //compute bias try { computeBias(); } catch (TasteException e) { System.out.println("error in bias computing"); } }
From source file:com.predictionmarketing.itemrecommend.RatingSGDFactorizer.java
License:Apache License
protected void prepareTraining() throws TasteException { RandomWrapper random = (RandomWrapper) RandomUtils.getRandom(0L); userVectors = new double[dataModel.getNumUsers()][numFeatures]; itemVectors = new double[dataModel.getNumItems()][numFeatures]; LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs(); while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug long userID = socialuser.nextLong(); SocialIDset.add(userID);//w w w . j a va 2s. com } double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][0] = globalAverage; userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * randomNoise; } } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise; } } //compute bias //user bias double userAverage = 0; double userMax = -Double.MAX_VALUE; double userMin = Double.MAX_VALUE; LongPrimitiveIterator userIDs = dataModel.getUserIDs(); while (userIDs.hasNext()) { long userid = userIDs.nextLong(); double user_sum = 0; for (Preference record : dataModel.getPreferencesFromUser(userid)) { user_sum += record.getValue(); } userAverage += user_sum; userMax = (user_sum > userMax) ? user_sum : userMax; userMin = (user_sum < userMin) ? user_sum : userMin; int userindex = userIndex(userid); userVectors[userindex][USER_BIAS_INDEX] = user_sum; } userAverage /= dataModel.getNumUsers(); double min_dist = userAverage - userMin; double max_dist = userMax - userAverage; double normalize = (min_dist > max_dist) ? min_dist : max_dist; userIDs = dataModel.getUserIDs(); while (userIDs.hasNext()) { long userid = userIDs.nextLong(); int userindex = userIndex(userid); userVectors[userindex][USER_BIAS_INDEX] -= userAverage; userVectors[userindex][USER_BIAS_INDEX] /= normalize; } //item bias double itemAverage = 0; double itemMax = -Double.MAX_VALUE; double itemMin = Double.MAX_VALUE; LongPrimitiveIterator itemIDs = dataModel.getItemIDs(); while (itemIDs.hasNext()) { long itemid = itemIDs.nextLong(); double item_sum = 0; for (Preference record : dataModel.getPreferencesForItem(itemid)) { item_sum += record.getValue(); } itemAverage += item_sum; itemMax = (item_sum > itemMax) ? item_sum : itemMax; itemMin = (item_sum < itemMin) ? item_sum : itemMin; int itemindex = itemIndex(itemid); itemVectors[itemindex][ITEM_BIAS_INDEX] = item_sum; } itemAverage /= dataModel.getNumItems(); min_dist = itemAverage - itemMin; max_dist = itemMax - itemAverage; normalize = (min_dist > max_dist) ? min_dist : max_dist; itemIDs = dataModel.getItemIDs(); while (itemIDs.hasNext()) { long itemid = itemIDs.nextLong(); int itemindex = itemIndex(itemid); itemVectors[itemindex][ITEM_BIAS_INDEX] -= itemAverage; itemVectors[itemindex][ITEM_BIAS_INDEX] /= normalize; } cachePreferences(); shufflePreferences(); }
From source file:com.predictionmarketing.itemrecommend.SVDPPFactorizer.java
License:Apache License
protected void prepareTraining() throws TasteException { super.prepareTraining(); Random random = RandomUtils.getRandom(10L); p = new double[dataModel.getNumUsers()][numFeatures]; for (int i = 0; i < p.length; i++) { for (int feature = 0; feature < FEATURE_OFFSET; feature++) { p[i][feature] = 0;//from www .j av a 2s .c o m } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { p[i][feature] = random.nextGaussian() * randomNoise; } } y = new double[dataModel.getNumItems()][numFeatures]; for (int i = 0; i < y.length; i++) { for (int feature = 0; feature < FEATURE_OFFSET; feature++) { y[i][feature] = 0; } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { y[i][feature] = random.nextGaussian() * randomNoise; } } /* get internal item IDs which we will need several times */ itemsByUser = Maps.newHashMap(); LongPrimitiveIterator userIDs = dataModel.getUserIDs(); while (userIDs.hasNext()) { long userId = userIDs.nextLong(); int userIndex = userIndex(userId); FastIDSet itemIDsFromUser = dataModel.getItemIDsFromUser(userId); List<Integer> itemIndexes = Lists.newArrayListWithCapacity(itemIDsFromUser.size()); itemsByUser.put(userIndex, itemIndexes); // cover testuser's reply items if (userId == testuser) { for (long itemID2 : itemIDsFromUser) { if (coveruser != (postwriter.get(itemID2))) { int i2 = itemIndex(itemID2); itemIndexes.add(i2); } } } else { for (long itemID2 : itemIDsFromUser) { int i2 = itemIndex(itemID2); itemIndexes.add(i2); } } } LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs(); while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug long userID = socialuser.nextLong(); SocialIDset.add(userID); } }