Example usage for org.apache.mahout.math Matrix assignRow

List of usage examples for org.apache.mahout.math Matrix assignRow

Introduction

In this page you can find the example usage for org.apache.mahout.math Matrix assignRow.

Prototype

Matrix assignRow(int row, Vector other);

Source Link

Document

Assign the other vector values to the row of the receiver

Usage

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {//from w ww .j av  a2s. c  om
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        Closeables.close(in, true);
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materializeLocal(String modelfile) throws IOException {

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;
    float alphaI;

    System.out.println(modelfile);
    ClassLoader loader = NaiveBayesModel.class.getClassLoader();
    InputStream sin = loader.getResourceAsStream(modelfile);
    DataInputStream in = new DataInputStream(sin);
    try {//  w  ww.ja  va 2s.co m
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        in.close();
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}

From source file:com.skp.experiment.math.als.hadoop.ImplicitFeedbackAlternatingLeastSquaresReasonSolver.java

License:Apache License

private Matrix YtransposeY(OpenIntObjectHashMap<Vector> Y) {

    Matrix compactedY = new DenseMatrix(Y.size(), numFeatures);
    IntArrayList indexes = Y.keys();//from  w w w. j  a  va2s  . com
    indexes.quickSort();

    int row = 0;
    for (int index : indexes.elements()) {
        compactedY.assignRow(row++, Y.get(index));
    }

    return compactedY.transpose().times(compactedY);
}

From source file:io.ssc.relationdiscovery.SVD.java

License:Open Source License

public void compute() {

    new LanczosSolver().solve(lanczosState, rank + OVERSHOOT, false);

    Matrix singularVectorCandidates = new DenseMatrix(rank + OVERSHOOT, A.numCols());
    for (int n = 0; n < rank + OVERSHOOT; n++) {
        singularVectorCandidates.assignRow(n, lanczosState.getRightSingularVector(n));
    }/*from  w w w.  j av a  2  s .c  o m*/

    findSingularVectors(singularVectorCandidates);
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*from   w ww  .jav a2 s .  c  o m*/
    }

    // Preconditions.checkNotNull(scoresPerFeature);
    // Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    Vector perlabelThetaNormalizer = scoresPerLabel.like();
    /*
     * for (Pair<Text,VectorWritable> entry : new
     * SequenceFileDirIterable<Text,VectorWritable>( new Path(base,
     * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(),
     * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.
     * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer =
     * entry.getSecond().get(); } }
     * 
     * Preconditions.checkNotNull(perlabelThetaNormalizer);
     */
    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perlabelThetaNormalizer, alphaI, false);
}

From source file:org.pigml.classify.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path modelDir, Configuration conf) throws IOException {
    OpenIntDoubleHashMap weightsPerLabel = new OpenIntDoubleHashMap();
    OpenIntDoubleHashMap weightsPerFeature = new OpenIntDoubleHashMap();

    SequenceFileDirIterable<IntWritable, DoubleWritable> kvs;
    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "label_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerLabel.put(kv.getFirst().get(), kv.getSecond().get());
    }//from w  w w  . j a va 2 s . c o  m

    kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "feature_weights"),
            PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, DoubleWritable> kv : kvs) {
        weightsPerFeature.put(kv.getFirst().get(), kv.getSecond().get());
    }

    Matrix weightsPerLabelAndFeature = null;
    SequenceFileDirIterable<IntWritable, VectorWritable> labelVectors = new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(modelDir, "label_feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf);
    for (Pair<IntWritable, VectorWritable> labelVector : labelVectors) {
        int label = labelVector.getFirst().get();
        Vector vector = labelVector.getSecond().get();
        if (weightsPerLabelAndFeature == null) {
            weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), vector.size());
        }
        weightsPerLabelAndFeature.assignRow(label, vector);
    }

    // TODO alphaI is hard-coded to 1.0
    // TODO perLabelThetaNormalizer is not supported yet
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            1.0f);
    model.validate();
    return model;
}

From source file:org.qcri.pca.PCATest.java

License:Apache License

@Test
public void crossTestIterationOfMapReducePPCASequentialPPCA() throws Exception {
    Matrix C_central = PCACommon.randomMatrix(D, d);
    double ss = PCACommon.randSS();
    InitialValues initValSeq = new InitialValues(C_central, ss);
    InitialValues initValMR = new InitialValues(C_central.clone(), ss);

    //1. run sequential
    Matrix Ye_central = new DenseMatrix(N, D);
    int row = 0;/*  w  ww  .j  a  va2  s  .  com*/
    for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null,
            conf)) {
        Ye_central.assignRow(row, vw.get());
        row++;
    }
    double bishopSeqErr = ppcaDriver.runSequential(conf, Ye_central, initValSeq, 1);

    //2. run mapreduce
    DistributedRowMatrix Ye = new DistributedRowMatrix(input, tmp, N, D);
    Ye.setConf(conf);
    double bishopMRErr = ppcaDriver.runMapReduce(conf, Ye, initValMR, output, N, D, d, 1, 1, 1, 1);

    Assert.assertEquals("ss value is different in sequential and mapreduce PCA", initValSeq.ss, initValMR.ss,
            EPSILON);
    double seqCTrace = PCACommon.trace(initValSeq.C);
    double mrCTrace = PCACommon.trace(initValMR.C);
    Assert.assertEquals("C value is different in sequential and mapreduce PCA", seqCTrace, mrCTrace, EPSILON);
    Assert.assertEquals("The PPCA error between sequntial and mapreduce methods is too different: "
            + bishopSeqErr + "!= " + bishopMRErr, bishopSeqErr, bishopMRErr, EPSILON);
}

From source file:org.qcri.pca.SPCADriver.java

/***
 * PPCA: sequential PPCA based on the paper from Tipping and Bishop
 * /*from   w ww. j  a  va  2 s  .  c om*/
 * @param conf
 *          the configuration
 * @param input
 *          the path to the input matrix Y
 * @param output
 *          the output path (not used currently)
 * @param nRows
 *          number or rows in Y
 * @param nCols
 *          number of columns in Y
 * @param nPCs
 *          number of desired principal components
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Path input, Path output, final int nRows, final int nCols,
        final int nPCs) throws Exception {
    Matrix centralY = new DenseMatrix(nRows, nCols);
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    if (fs.listStatus(input).length == 0) {
        System.err.println("No file under " + input);
        return 0;
    }
    int row = 0;
    for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null,
            conf)) {
        centralY.assignRow(row, vw.get());
        row++;
    }
    Matrix centralC = PCACommon.randomMatrix(nCols, nPCs);
    double ss = PCACommon.randSS();
    InitialValues initVal = new InitialValues(centralC, ss);
    // Matrix sampledYe = sample(centralY);
    // runSequential(conf, sampledYe, initVal, 100);
    double error = runSequential(conf, centralY, initVal, 100);
    return error;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * PPCA: sequential PPCA based on the matlab implementation of Jacob Verbeek
 * /*ww  w.  j a  v  a  2  s  .  c o m*/
 * @param conf
 *          the configuration
 * @param input
 *          the path to the input matrix Y
 * @param output
 *          the output path (not used currently)
 * @param nRows
 *          number or rows in Y
 * @param nCols
 *          number of columns in Y
 * @param nPCs
 *          number of desired principal components
 * @return the error
 * @throws Exception
 */
double runSequential_JacobVersion(Configuration conf, Path input, Path output, final int nRows, final int nCols,
        final int nPCs) throws Exception {
    Matrix centralY = new DenseMatrix(nRows, nCols);
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    if (fs.listStatus(input).length == 0) {
        System.err.println("No file under " + input);
        return 0;
    }
    int row = 0;
    for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null,
            conf)) {
        centralY.assignRow(row, vw.get());
        row++;
    }
    Matrix C = PCACommon.randomMatrix(nCols, nPCs);
    double ss = PCACommon.randSS();
    InitialValues initVal = new InitialValues(C, ss);
    double error = runSequential_JacobVersion(conf, centralY, initVal, 100);
    return error;
}