Example usage for org.apache.mahout.math MatrixSlice index

Introduction

In this page you can find the example usage for org.apache.mahout.math MatrixSlice index.

Prototype

int index

To view the source code for org.apache.mahout.math MatrixSlice index.

Click Source Link

Usage

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

private static Vector viewRowSums(Matrix m) {
    Vector v = new DenseVector(m.numRows());
    for (MatrixSlice slice : m) {
        v.set(slice.index(), slice.vector().norm(1));
    }/*from   ww w. j  a v  a  2s  . co m*/
    return v;
}

From source file:com.elex.dmp.lda.CachingCVB0Mapper.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    log.info("Stopping model trainer");
    modelTrainer.stop();/*from   w w  w  .  j a va  2s  .  c  om*/

    log.info("Writing model");
    TopicModel model = modelTrainer.getReadModel();
    for (MatrixSlice topic : model) {
        context.write(new Text(Integer.toString(topic.index())), new VectorWritable(topic.vector()));
    }
}

From source file:com.elex.dmp.lda.ModelTrainer.java

License:Apache License

public double calculatePerplexity(VectorIterable matrix, VectorIterable docTopicCounts, double testFraction) {
    Iterator<MatrixSlice> docIterator = matrix.iterator();
    Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator();
    double perplexity = 0;
    double matrixNorm = 0;
    while (docIterator.hasNext() && docTopicIterator.hasNext()) {
        MatrixSlice docSlice = docIterator.next();
        MatrixSlice topicSlice = docTopicIterator.next();
        int docId = docSlice.index();
        Vector document = docSlice.vector();
        Vector topicDist = topicSlice.vector();
        if (testFraction == 0 || docId % (1 / testFraction) == 0) {
            trainSync(document, topicDist, false, 10);
            perplexity += readModel.perplexity(document, topicDist);
            matrixNorm += document.norm(1);
        }//from  w ww . jav a 2s . com
    }
    return perplexity / matrixNorm;
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java

License:Apache License

/**
 * small integration test that runs the full job
 *
 * <pre>//from ww w  .ja va 2  s . c om
 *
 *  user-item-matrix
 *
 *          burger  hotdog  berries  icecream
 *  dog       5       5        2        -
 *  rabbit    2       -        3        5
 *  cow       -       5        -        3
 *  donkey    3       -        -        5
 *
 * </pre>
 */
@Test
public void completeJobToyExample() throws Exception {

    Double na = Double.NaN;
    Matrix preferences = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }),
                    new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }),
                    new DenseVector(new double[] { na, 5.0, na, 3.0 }),
                    new DenseVector(new double[] { 3.0, na, na, 5.0 }) });

    writeLines(inputFile, preferencesAsText(preferences));
    indexSizeFile.deleteOnExit();
    writeLines(indexSizeFile, "0,4\n1,4");

    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;

    alsFactorization
            .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
                    "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures",
                    String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations),
                    "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" });
    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
            preferences.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
            preferences.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
        while (elementIterator.hasNext()) {
            Vector.Element e = elementIterator.next();
            if (!Double.isNaN(e.get())) {
                double pref = e.get();
                double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index()));
                double err = pref - estimate;
                avg.addDatum(err * err);
                log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]",
                        new Object[] { slice.index(), e.index(), pref, estimate });
            }
        }
    }
    double rmse = Math.sqrt(avg.getAverage());
    log.info("RMSE: {}", rmse);

    assertTrue(rmse < 0.2);
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java

License:Apache License

@Test
public void completeJobImplicitToyExample() throws Exception {

    Matrix observations = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }),
                    new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }),
                    new DenseVector(new double[] { 0, 5.0, 0, 3.0 }),
                    new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) });

    Matrix preferences = new SparseRowMatrix(4, 4,
            new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }),
                    new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }),
                    new DenseVector(new double[] { 0, 1.0, 0, 1.0 }),
                    new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) });

    writeLines(inputFile, preferencesAsText(observations));
    writeLines(indexSizeFile, "0,4\n1,4");
    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);//from w  w  w.j a v a2s.c  o  m

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;
    double alpha = 20;

    alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output",
            outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda",
            String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha",
            String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations",
            String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform",
            "false" });

    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
            observations.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
            observations.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nObservations - users x items\n");
    info.append(MathHelper.nice(observations));
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        for (Vector.Element e : slice.vector()) {
            if (!Double.isNaN(e.get())) {
                double pref = e.get();
                double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index()));
                double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index());
                double err = confidence * (pref - estimate) * (pref - estimate);
                avg.addDatum(err);
                log.info(
                        "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] "
                                + "estimate is [{}]",
                        new Object[] { slice.index(), e.index(), pref, confidence, estimate });
            }
        }
    }
    double rmse = Math.sqrt(avg.getAverage());
    log.info("RMSE: {}", rmse);

    assertTrue(rmse < 0.4);
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java

License:Apache License

protected static String preferencesAsText(Matrix preferences) {
    StringBuilder prefsAsText = new StringBuilder();
    String separator = "";
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
        while (elementIterator.hasNext()) {
            Vector.Element e = elementIterator.next();
            if (!Double.isNaN(e.get())) {
                prefsAsText.append(separator).append(slice.index()).append(',').append(e.index()).append(',')
                        .append(e.get());
                separator = "\n";
            }/*  w  w  w .  jav a2  s.com*/
        }
    }
    return prefsAsText.toString();
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/***
 * If the MapDir matrix is small, we can convert it to an in memory representation
 * and then run efficient centralized operations
 * /* ww w . j  av a 2s . c  o m*/
 * @param origMtx in MapDir format (generated by MatrixOutputFormat)
 * @return a dense matrix including the data
 * @throws IOException 
 */
public static DenseMatrix toDenseMatrix(DistributedRowMatrix origMtx) throws IOException {
    MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath());
    DenseMatrix mtx = new DenseMatrix(origMtx.numRows(), origMtx.numCols());
    Iterator<MatrixSlice> sliceIterator;
    try {
        sliceIterator = mapDir.iterateAll();
    } catch (Exception e) {
        log.info(e.toString());
        log.info("Input is not in matrix format, trying SequenceFileFormat instead ...");
        sliceIterator = origMtx.iterateAll();
    }
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        //      int r = slice.index();
        //      for (int c = 0; c < mtx.numCols(); c++) {
        //        mtx.set(r, c, slice.get(c));
        //      }
        mtx.viewRow(slice.index()).assign(slice.vector());
    }
    mapDir.close();
    return mtx;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/***
 * If the MapDir matrix is small, we can convert it to an in memory representation
 * and then run efficient centralized operations
 * //from   w w w . j  a va 2  s. c  o m
 * @param origMtx in MapDir format (generated by MatrixOutputFormat)
 * @return a dense matrix including the data
 * @throws IOException 
 */
static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException {
    MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath());
    SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols());
    Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        mtx.viewRow(slice.index()).assign(slice.vector());
    }
    mapDir.close();
    return mtx;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Trace of a matrix obtained in a centralized way. For some reason, which I did not have time to debug, raises memory exception for big matrices. 
 * /*from  w  w  w  .  ja  va 2s.c  om*/
 * TODO: MapReduce job for traces of big matrices.
 * @param origMtx
 * @return trace of the input matrix
 * @throws IOException
 */
static double trace(DistributedRowMatrix origMtx) throws IOException {
    MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath());
    Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll();
    double trace = 0;
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        int index = slice.index();
        if (index >= slice.vector().size())
            break;
        double value = slice.vector().get(index);
        trace += Double.isNaN(value) ? 0 : value;
    }
    mapDir.close();
    return trace;
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

public static void testIterator(DistributedRowMatrix origMtx, Path inPath) throws IOException {
    Configuration conf = new Configuration();
    MapDir mapDir = new MapDir(conf, inPath);

    Iterator<MatrixSlice> sliceIterator = origMtx.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        int index = slice.index();
        System.out.println("A[" + index + "] = " + slice.vector());

        IntWritable key = new IntWritable(index);
        VectorWritable vw = new VectorWritable();
        vw = mapDir.get(key, vw);// w  w  w .j  av  a  2  s.  com
        System.out.println("B[" + index + "] = " + vw);
    }
    mapDir.close();
}