Example usage for org.apache.mahout.math SparseRowMatrix SparseRowMatrix

List of usage examples for org.apache.mahout.math SparseRowMatrix SparseRowMatrix

Introduction

In this page you can find the example usage for org.apache.mahout.math SparseRowMatrix SparseRowMatrix.

Prototype

public SparseRowMatrix(int rows, int columns) 

Source Link

Document

Construct a matrix of the given cardinality, with rows defaulting to RandomAccessSparseVector implementation

Usage

From source file:com.elex.dmp.lda.CVB0DocInferenceMapper.java

License:Apache License

@Override
public void map(Text docId, VectorWritable doc, Context context) throws IOException, InterruptedException {
    int numTopics = getNumTopics();
    Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
    Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
    int maxIters = getMaxIters();
    ModelTrainer modelTrainer = getModelTrainer();
    for (int i = 0; i < maxIters; i++) {
        modelTrainer.getReadModel().trainDocTopicModel(doc.get(), docTopics, docModel);
    }//w ww  . j av  a 2 s  . c  om
    context.write(docId, new VectorWritable(docTopics));
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;//w ww  .j ava 2  s  .  c o m
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        Closeables.close(in, true);
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materializeLocal(String modelfile) throws IOException {

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;//from  w w  w  .j a v  a 2 s. c  o  m
    float alphaI;

    System.out.println(modelfile);
    ClassLoader loader = NaiveBayesModel.class.getClassLoader();
    InputStream sin = loader.getResourceAsStream(modelfile);
    DataInputStream in = new DataInputStream(sin);
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        in.close();
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java

License:Apache License

public static void main(String[] args) throws IOException {

    int numUsers = 1823179;
    int numItems = 136736;
    double mu = 3.157255412010664;

    String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/";
    String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv";
    String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv";
    String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv";
    String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv";

    Matrix similarities = new SparseRowMatrix(numItems, numItems);

    System.out.println("Reading similarities...");
    int similaritiesRead = 0;
    Configuration conf = new Configuration();
    for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) {

        int item = pair.getFirst().get();
        Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero();

        while (elements.hasNext()) {
            Vector.Element elem = elements.next();
            similarities.setQuick(item, elem.index(), elem.get());
            similaritiesRead++;/*from   w w  w  . j  a v  a  2  s  .c o  m*/
        }
    }
    System.out.println("Found " + similaritiesRead + " similarities");

    Pattern sep = Pattern.compile("\t");

    double[] itemBiases = new double[numItems];
    double[] userBiases = new double[numUsers];

    System.out.println("Reading item biases");
    for (String line : new FileLineIterable(new File(itemBiasesFilePath))) {
        String[] parts = sep.split(line);
        itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    System.out.println("Reading user biases");
    for (String line : new FileLineIterable(new File(userBiasesFilePath))) {
        String[] parts = sep.split(line);
        userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator();
    Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator();

    int currentUser = 0;
    OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap();

    int usersProcessed = 0;
    RunningAverage rmse = new FullRunningAverage();
    RunningAverage mae = new FullRunningAverage();

    RunningAverage rmseBase = new FullRunningAverage();
    RunningAverage maeBase = new FullRunningAverage();

    while (trainRatings.hasNext()) {
        Rating rating = trainRatings.next();
        if (rating.user() != currentUser) {

            for (int n = 0; n < 10; n++) {
                Rating heldOutRating = heldOutRatings.next();
                Preconditions.checkState(heldOutRating.user() == currentUser);

                double preference = 0.0;
                double totalSimilarity = 0.0;
                int count = 0;

                Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item())
                        .iterateNonZero();
                while (similarItems.hasNext()) {
                    Vector.Element similarity = similarItems.next();
                    int similarItem = similarity.index();
                    if (prefs.containsKey(similarItem)) {
                        preference += similarity.get() * (prefs.get(similarItem)
                                - (mu + userBiases[currentUser] + itemBiases[similarItem]));
                        totalSimilarity += Math.abs(similarity.get());
                        count++;

                    }
                }

                double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()];
                double estimate = baselineEstimate;

                if (count > 1) {
                    estimate += preference / totalSimilarity;
                }

                double baseError = Math.abs(heldOutRating.rating() - baselineEstimate);
                maeBase.addDatum(baseError);
                rmseBase.addDatum(baseError * baseError);

                double error = Math.abs(heldOutRating.rating() - estimate);
                mae.addDatum(error);
                rmse.addDatum(error * error);

            }

            if (++usersProcessed % 10000 == 0) {
                System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
                        + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage()
                        + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage()));
            }

            currentUser = rating.user();
            prefs.clear();

        }
        prefs.put(rating.item(), rating.rating());

    }

    System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
            + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE "
            + Math.sqrt(rmseBase.getAverage()));
}

From source file:io.ssc.relationdiscovery.SVD.java

License:Open Source License

public Matrix projectRowsOntoFeatureSpace() {

    SparseRowMatrix projection = new SparseRowMatrix(A.numRows(), rank);

    for (int patternIndex = 0; patternIndex < A.numRows(); patternIndex++) {

        Vector patternOccurrences = A.viewRow(patternIndex);

        for (int r = 0; r < rank; r++) {
            WeightedVector singularVector = singularVectors.get(r);
            double weight = singularVector.getWeight() * patternOccurrences.dot(singularVector);
            projection.setQuick(patternIndex, r, weight);
        }//w  w  w . ja  va  2 s  .  c o m
    }
    return projection;
}

From source file:io.ssc.relationdiscovery.Utils.java

License:Open Source License

public static Matrix loadOccurrences(File occurrences, int numRows, int numColumns) throws IOException {

    Matrix A = new SparseRowMatrix(numRows, numColumns);

    Pattern splitter = Pattern.compile("\t");
    Pattern splitter2 = Pattern.compile(":");
    for (String line : new FileLineIterable(occurrences)) {
        String[] parts = splitter.split(line);

        if (parts.length > 1) {
            int entityIndex = Integer.parseInt(parts[0]);

            for (int index = 1; index < parts.length; index++) {
                String[] tokens = splitter2.split(parts[index]);
                int patternIndex = Integer.parseInt(tokens[0]);
                double value = Double.parseDouble(tokens[1]);

                A.setQuick(patternIndex - 1, entityIndex - 1, value);
            }/*w w  w .  j  ava 2 s . c  o  m*/
        }
    }
    return A;
}

From source file:org.pigml.classify.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path modelDir, Configuration conf) throws IOException {
    OpenIntDoubleHashMap weightsPerLabel = new OpenIntDoubleHashMap();
    OpenIntDoubleHashMap weightsPerFeature = new OpenIntDoubleHashMap();

    SequenceFileDirIterable<IntWritable, DoubleWritable> kvs;
    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "label_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerLabel.put(kv.getFirst().get(), kv.getSecond().get());
    }/*from w ww.  j a v a 2s .  c  o m*/

    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "feature_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerFeature.put(kv.getFirst().get(), kv.getSecond().get());
    }

    Matrix weightsPerLabelAndFeature = null;
    SequenceFileDirIterable<IntWritable, VectorWritable> labelVectors = new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(modelDir, "label_feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, VectorWritable> labelVector : labelVectors) {
        int label = labelVector.getFirst().get();
        Vector vector = labelVector.getSecond().get();
        if (weightsPerLabelAndFeature == null) {
            weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), vector.size());
        }
        weightsPerLabelAndFeature.assignRow(label, vector);
    }

    // TODO alphaI is hard-coded to 1.0
    // TODO perLabelThetaNormalizer is not supported yet
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            1.0f);
    model.validate();
    return model;
}