Example usage for org.apache.mahout.common RandomUtils getRandom

List of usage examples for org.apache.mahout.common RandomUtils getRandom

Introduction

In this page you can find the example usage for org.apache.mahout.common RandomUtils getRandom.

Prototype

public static Random getRandom(long seed) 

Source Link

Usage

From source file:com.elex.dmp.lda.CachingCVB0Mapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    log.info("Retrieving configuration");
    Configuration conf = context.getConfiguration();
    float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN);
    float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN);
    long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L);
    numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1);
    int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1);
    int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1);
    int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4);
    maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10);
    float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f);

    log.info("Initializing read model");
    TopicModel readModel;/*from  www.  j  a v  a 2  s  . c o m*/
    Path[] modelPaths = CVB0Driver.getModelPaths(conf);
    if (modelPaths != null && modelPaths.length > 0) {
        readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
    } else {
        log.info("No model files found");
        readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                numTrainThreads, modelWeight);
    }

    log.info("Initializing write model");
    TopicModel writeModel = modelWeight == 1
            ? new TopicModel(numTopics, numTerms, eta, alpha, null, numUpdateThreads)
            : readModel;

    log.info("Initializing model trainer");
    modelTrainer = new ModelTrainer(readModel, writeModel, numTrainThreads, numTopics, numTerms);
    modelTrainer.start();
}

From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    MemoryUtil.startMemoryLogger(5000);//from   w w  w  . j av  a  2s . com

    log.info("Retrieving configuration");
    Configuration conf = context.getConfiguration();
    float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN);
    float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN);
    long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L);
    random = RandomUtils.getRandom(seed);
    numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1);
    int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1);
    int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1);
    int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4);
    maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10);
    float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f);
    testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f);

    log.info("Initializing read model");
    TopicModel readModel;
    Path[] modelPaths = CVB0Driver.getModelPaths(conf);
    if (modelPaths != null && modelPaths.length > 0) {
        readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
    } else {
        log.info("No model files found");
        readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                numTrainThreads, modelWeight);
    }

    log.info("Initializing model trainer");
    modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms);

    log.info("Initializing topic vector");
    topicVector = new DenseVector(new double[numTopics]);
}

From source file:com.predictionmarketing.itemrecommend.CliMF.java

License:Apache License

protected void prepareTraining() throws TasteException {
    RandomWrapper random = (RandomWrapper) RandomUtils.getRandom(0L);
    userVectors = new double[dataModel.getNumUsers()][numFeatures];
    itemVectors = new double[dataModel.getNumItems()][numFeatures];
    LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs();
    while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug
        long userID = socialuser.nextLong();
        SocialIDset.add(userID);//w  w w  . java2 s. c  o  m
    }
    //        double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * randomNoise;
        }
        //unit vectorize
        userVectors[userIndex] = unitvectorize(userVectors[userIndex]);
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise;
        }
        //unit vectorize
        itemVectors[itemIndex] = unitvectorize(itemVectors[itemIndex]);
    }

    //compute bias
    try {
        computeBias();
    } catch (TasteException e) {
        System.out.println("error in bias computing");
    }
}

From source file:com.predictionmarketing.itemrecommend.RatingSGDFactorizer.java

License:Apache License

protected void prepareTraining() throws TasteException {
    RandomWrapper random = (RandomWrapper) RandomUtils.getRandom(0L);
    userVectors = new double[dataModel.getNumUsers()][numFeatures];
    itemVectors = new double[dataModel.getNumItems()][numFeatures];
    LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs();
    while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug
        long userID = socialuser.nextLong();
        SocialIDset.add(userID);//w w  w . j a  va 2s.  com
    }
    double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * randomNoise;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise;
        }
    }
    //compute bias
    //user bias
    double userAverage = 0;
    double userMax = -Double.MAX_VALUE;
    double userMin = Double.MAX_VALUE;
    LongPrimitiveIterator userIDs = dataModel.getUserIDs();
    while (userIDs.hasNext()) {
        long userid = userIDs.nextLong();
        double user_sum = 0;
        for (Preference record : dataModel.getPreferencesFromUser(userid)) {
            user_sum += record.getValue();
        }
        userAverage += user_sum;
        userMax = (user_sum > userMax) ? user_sum : userMax;
        userMin = (user_sum < userMin) ? user_sum : userMin;
        int userindex = userIndex(userid);
        userVectors[userindex][USER_BIAS_INDEX] = user_sum;
    }
    userAverage /= dataModel.getNumUsers();
    double min_dist = userAverage - userMin;
    double max_dist = userMax - userAverage;
    double normalize = (min_dist > max_dist) ? min_dist : max_dist;
    userIDs = dataModel.getUserIDs();
    while (userIDs.hasNext()) {
        long userid = userIDs.nextLong();
        int userindex = userIndex(userid);
        userVectors[userindex][USER_BIAS_INDEX] -= userAverage;
        userVectors[userindex][USER_BIAS_INDEX] /= normalize;
    }
    //item bias
    double itemAverage = 0;
    double itemMax = -Double.MAX_VALUE;
    double itemMin = Double.MAX_VALUE;
    LongPrimitiveIterator itemIDs = dataModel.getItemIDs();
    while (itemIDs.hasNext()) {
        long itemid = itemIDs.nextLong();
        double item_sum = 0;
        for (Preference record : dataModel.getPreferencesForItem(itemid)) {
            item_sum += record.getValue();
        }
        itemAverage += item_sum;
        itemMax = (item_sum > itemMax) ? item_sum : itemMax;
        itemMin = (item_sum < itemMin) ? item_sum : itemMin;
        int itemindex = itemIndex(itemid);
        itemVectors[itemindex][ITEM_BIAS_INDEX] = item_sum;
    }
    itemAverage /= dataModel.getNumItems();
    min_dist = itemAverage - itemMin;
    max_dist = itemMax - itemAverage;
    normalize = (min_dist > max_dist) ? min_dist : max_dist;
    itemIDs = dataModel.getItemIDs();
    while (itemIDs.hasNext()) {
        long itemid = itemIDs.nextLong();
        int itemindex = itemIndex(itemid);
        itemVectors[itemindex][ITEM_BIAS_INDEX] -= itemAverage;
        itemVectors[itemindex][ITEM_BIAS_INDEX] /= normalize;
    }

    cachePreferences();
    shufflePreferences();
}

From source file:com.predictionmarketing.itemrecommend.SVDPPFactorizer.java

License:Apache License

protected void prepareTraining() throws TasteException {
    super.prepareTraining();
    Random random = RandomUtils.getRandom(10L);
    p = new double[dataModel.getNumUsers()][numFeatures];
    for (int i = 0; i < p.length; i++) {
        for (int feature = 0; feature < FEATURE_OFFSET; feature++) {
            p[i][feature] = 0;//from www  .j  av  a  2s .c o m
        }
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            p[i][feature] = random.nextGaussian() * randomNoise;
        }
    }

    y = new double[dataModel.getNumItems()][numFeatures];
    for (int i = 0; i < y.length; i++) {
        for (int feature = 0; feature < FEATURE_OFFSET; feature++) {
            y[i][feature] = 0;
        }
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            y[i][feature] = random.nextGaussian() * randomNoise;
        }
    }

    /* get internal item IDs which we will need several times */
    itemsByUser = Maps.newHashMap();
    LongPrimitiveIterator userIDs = dataModel.getUserIDs();
    while (userIDs.hasNext()) {
        long userId = userIDs.nextLong();
        int userIndex = userIndex(userId);
        FastIDSet itemIDsFromUser = dataModel.getItemIDsFromUser(userId);
        List<Integer> itemIndexes = Lists.newArrayListWithCapacity(itemIDsFromUser.size());
        itemsByUser.put(userIndex, itemIndexes);
        //             cover testuser's reply items
        if (userId == testuser) {
            for (long itemID2 : itemIDsFromUser) {
                if (coveruser != (postwriter.get(itemID2))) {
                    int i2 = itemIndex(itemID2);
                    itemIndexes.add(i2);
                }
            }
        } else {
            for (long itemID2 : itemIDsFromUser) {
                int i2 = itemIndex(itemID2);
                itemIndexes.add(i2);
            }
        }
    }

    LongPrimitiveIterator socialuser = SocialdataModel.getUserIDs();
    while (socialuser.hasNext()) { //create social date model idset because user may have no friend ,and it will cause bug
        long userID = socialuser.nextLong();
        SocialIDset.add(userID);
    }
}