Example usage for org.apache.mahout.math Matrix set

List of usage examples for org.apache.mahout.math Matrix set

Introduction

In this page you can find the example usage for org.apache.mahout.math Matrix set.

Prototype

void set(String rowLabel, int row, double[] rowData);

Source Link

Document

Sets the row values at the given row index and updates the row labels

Usage

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void main(String[] args) throws IOException {

    //final Configuration conf = new Configuration();
    //final FileSystem fs = FileSystem.get(conf);
    //final SequenceFile.Reader reader = new SequenceFile.Reader(fs,
    //   new Path("R1.dat"), conf);
    //IntWritable key = new IntWritable();
    //VectorWritable vec = new VectorWritable();
    Matrix mat = new SparseMatrix(1500, 100);
    //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 );
    BufferedReader reader = new BufferedReader(new FileReader("R.3.csv"));
    String line = null;//from  w w w  . j a  va  2s . com
    while ((line = reader.readLine()) != null) {
        String[] parts = line.split(",");

        mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2]));
        /*
        Vector v = vec.get();
        int i=0;
        Iterator<Vector.Element> itr = v.iterateNonZero();
        while(itr.hasNext()){
           double elem = itr.next().get();
           if(elem !=0)
              mat2.setEntry(i, key.get(), elem);
           i++;
        }
        */
    }

    //mat = mat.transpose();
    System.out.println(mat.viewColumn(0).isDense());
    System.out.println(mat.viewRow(0).isDense());
    mat = mat.transpose();
    GramSchmidt.orthonormalizeColumns(mat);
    /*
    System.out.println("started QR");
    System.out.println(Runtime.getRuntime().maxMemory());
    System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory());
    QRDecomposition qr = new QRDecomposition(mat2);
    System.out.println(qr.getQ().getColumnDimension());
    System.out.println(qr.getQ().getRowDimension());
    */
    //mat = mat.transpose();
    //storeSparseColumns(mat);
    //for (int i = 0; i < 10; i++) {
    //   System.out.println(mat.viewRow(i).getNumNondefaultElements());
    //}

}

From source file:com.cloudera.knittingboar.messages.TestParameterVector.java

License:Apache License

public void testSerde() throws IOException {

    int classes = 20;
    int features = 10000;

    //GradientBuffer g = new GradientBuffer( classes, features );
    Matrix m = new DenseMatrix(classes, features);
    //m.set(0, 0, 0.1);
    //m.set(0, 1, 0.3);

    //g.numFeatures();

    for (int c = 0; c < classes - 1; c++) {

        for (int f = 0; f < features; f++) {

            m.set(c, f, (double) ((double) f / 10.0f));

        }/*from w  w w.j  a v  a2 s .  c o  m*/

    }

    System.out.println("matrix created...");

    ParameterVector vec_gradient = new ParameterVector();
    vec_gradient.SrcWorkerPassCount = pass_count;
    vec_gradient.parameter_vector = m;
    vec_gradient.AvgLogLikelihood = -1.368f;
    vec_gradient.PercentCorrect = 72.68f;
    vec_gradient.TrainedRecords = 2500;

    assertEquals(10000, vec_gradient.numFeatures());
    assertEquals(10000, vec_gradient.parameter_vector.columnSize());

    assertEquals(20, vec_gradient.numCategories());
    assertEquals(20, vec_gradient.parameter_vector.rowSize());

    byte[] buf = vec_gradient.Serialize();

    ParameterVector vec_gradient_deserialized = new ParameterVector();
    vec_gradient_deserialized.Deserialize(buf);

    assertEquals(pass_count, vec_gradient_deserialized.SrcWorkerPassCount);
    assertEquals(0.1, vec_gradient_deserialized.parameter_vector.get(0, 1));
    assertEquals(0.2, vec_gradient_deserialized.parameter_vector.get(0, 2));
    assertEquals(0.3, vec_gradient_deserialized.parameter_vector.get(0, 3));
    assertEquals(0.4, vec_gradient_deserialized.parameter_vector.get(0, 4));
    assertEquals(0.5, vec_gradient_deserialized.parameter_vector.get(0, 5));

    assertEquals(-1.368f, vec_gradient_deserialized.AvgLogLikelihood);
    assertEquals(72.68f, vec_gradient_deserialized.PercentCorrect);
    assertEquals(2500, vec_gradient_deserialized.TrainedRecords);

}

From source file:com.cloudera.knittingboar.messages.TestParameterVectorGradient.java

License:Apache License

public void testSerde() throws IOException {

    int classes = 20;
    int features = 10000;

    //GradientBuffer g = new GradientBuffer( classes, features );
    Matrix m = new DenseMatrix(classes, features);
    //m.set(0, 0, 0.1);
    //m.set(0, 1, 0.3);

    //g.numFeatures();

    for (int c = 0; c < classes - 1; c++) {

        for (int f = 0; f < features; f++) {

            m.set(c, f, (double) ((double) f / 10.0f));

        }//from ww  w  .  j av  a 2s.c  om

    }

    System.out.println("matrix created...");

    ParameterVectorGradient vec_gradient = new ParameterVectorGradient();
    vec_gradient.SrcWorkerPassCount = pass_count;
    vec_gradient.parameter_vector = m;
    vec_gradient.AvgLogLikelihood = -1.368f;
    vec_gradient.PercentCorrect = 72.68f;
    vec_gradient.TrainedRecords = 2500;

    assertEquals(10000, vec_gradient.numFeatures());
    assertEquals(10000, vec_gradient.parameter_vector.columnSize());

    assertEquals(20, vec_gradient.numCategories());
    assertEquals(20, vec_gradient.parameter_vector.rowSize());

    byte[] buf = vec_gradient.Serialize();

    ParameterVectorGradient vec_gradient_deserialized = new ParameterVectorGradient();
    vec_gradient_deserialized.Deserialize(buf);

    assertEquals(pass_count, vec_gradient_deserialized.SrcWorkerPassCount);
    assertEquals(0.1, vec_gradient_deserialized.parameter_vector.get(0, 1));
    assertEquals(0.2, vec_gradient_deserialized.parameter_vector.get(0, 2));
    assertEquals(0.3, vec_gradient_deserialized.parameter_vector.get(0, 3));
    assertEquals(0.4, vec_gradient_deserialized.parameter_vector.get(0, 4));
    assertEquals(0.5, vec_gradient_deserialized.parameter_vector.get(0, 5));

    assertEquals(-1.368f, vec_gradient_deserialized.AvgLogLikelihood);
    assertEquals(72.68f, vec_gradient_deserialized.PercentCorrect);
    assertEquals(2500, vec_gradient_deserialized.TrainedRecords);

}

From source file:com.cloudera.knittingboar.sgd.TestGradientBuffer.java

License:Apache License

public void testAccumulateGradientMatrix() {

    GradientBuffer g0 = new GradientBuffer(2, 2);

    g0.setCell(0, 0, 0.4);//from  w w  w  .j av a2  s  .  c o m
    g0.setCell(0, 1, 0.3);

    assertEquals(g0.numFeatures(), 2);

    /*    GradientBuffer g1 = new GradientBuffer( 2, 2 );
                
        g1.setCell(0, 0, 0.1);
        g1.setCell(0, 1, 0.3);
                
        assertEquals( g1.numFeatures(), 2 );
    */
    Matrix m = new DenseMatrix(2, 2);
    m.set(0, 0, 0.1);
    m.set(0, 1, 0.3);

    g0.AccumulateGradient(m);
    //m.get(arg0, arg1)
    // check source
    assertEquals(m.get(0, 0), 0.1);
    // check accumlation in g0
    assertEquals(g0.getCell(0, 0), 0.5);

    // check source
    assertEquals(m.get(0, 1), 0.3);
    // check accumlation in g0
    assertEquals(g0.getCell(0, 1), 0.6);

    System.out.println("matrix accumulation test done!");

    assertNotNull(0);

}

From source file:com.cloudera.knittingboar.sgd.TestGradientBuffer.java

License:Apache License

public void testAverageGradientBuffer() {

    System.out.println("testAverageGradientBuffer --------");

    GradientBuffer g0 = new GradientBuffer(2, 2);

    g0.setCell(0, 0, 0.1d);//  www . j av  a 2 s  .  c  o  m
    g0.setCell(0, 1, 0.5d);

    assertEquals(g0.numFeatures(), 2);

    Matrix m = new DenseMatrix(2, 2);
    m.set(0, 0, 0.5d);
    m.set(0, 1, 0.1d);

    g0.AccumulateGradient(m);
    //m.get(arg0, arg1)
    // check source
    assertEquals(m.get(0, 0), 0.5d);
    // check accumlation in g0
    //assertEquals( g0.getCell(0, 0), 0.6 );

    junit.framework.Assert.assertEquals(0.6d, g0.getCell(0, 0), 0.0001);

    // check source
    assertEquals(m.get(0, 1), 0.1d);
    // check accumlation in g0
    //    assertEquals( g0.getCell(0, 1), 0.6 );

    Utils.PrintVectorNonZero(g0.gamma.viewRow(0));
    //Utils.PrintVectorNonZero(g0.gamma.viewRow(1));

    g0.AverageAccumulations(2);

    Utils.PrintVectorNonZero(g0.gamma.viewRow(0));

    System.out.println("matrix accumulation AVG test done!");

    //System.out.println( "add test: " + ( 10.0d + 6.0d ) );

    assertNotNull(0);

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java

License:Apache License

/**
 * [ ******* Rebuilding this currently ******* ]
 * /* w ww .j  a v  a 2  s  .  c  o m*/
 * Tests replacing the beta, presumably from the master, after we've run POLR a bit 
 * @throws Exception 
 */
public void testReplaceBetaMechanics() throws Exception {

    System.out.println("\n------ testReplaceBetaMechanics --------- ");

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    System.out.println("split count: " + splits.length);

    POLRWorkerNode worker_model_builder = new POLRWorkerNode();

    // ------------------    
    // generate the debug conf ---- normally setup by YARN stuff
    worker_model_builder.setup(this.generateDebugConfigurationObject());

    System.out.println("split: " + splits[0].toString());

    TextRecordParser txt_reader = new TextRecordParser();

    long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]);

    txt_reader.setFile(splits[0].toString().split(":")[1], 0, len);

    worker_model_builder.setRecordParser(txt_reader);

    //      worker_model_builder.RunNextTrainingBatch();
    worker_model_builder.compute();

    //    worker_model_builder.polr.Set

    // ------------------- now replace beta ------------

    double val1 = -1.0;

    // GradientBuffer g0 = new GradientBuffer( 2, worker_model_builder.FeatureVectorSize );
    Matrix m = new DenseMatrix(2, feature_vector_size);

    for (int x = 0; x < feature_vector_size; x++) {

        m.set(0, x, val1);

    }

    worker_model_builder.polr.SetBeta(m);

    for (int x = 0; x < feature_vector_size; x++) {

        assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1);

    }

    System.out.println("--------------------------------\n");

}

From source file:com.skp.experiment.common.MathHelper.java

License:Apache License

/**
 * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable>
 *///from   w  ww  .j  a  v a 2s  . co  m
public static Matrix readMatrix(Configuration conf, Path path, int rows, int columns) {
    boolean readOneRow = false;
    Matrix matrix = new DenseMatrix(rows, columns);
    for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path,
            true, conf)) {
        IntWritable key = record.getFirst();
        VectorWritable value = record.getSecond();
        readOneRow = true;
        int row = key.get();
        Iterator<Vector.Element> elementsIterator = value.get().iterateNonZero();
        while (elementsIterator.hasNext()) {
            Vector.Element element = elementsIterator.next();
            matrix.set(row, element.index(), element.get());
        }
    }
    if (!readOneRow) {
        throw new IllegalStateException("Not a single row read!");
    }
    return matrix;
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }/*from w w w.  jav  a  2  s  . co m*/
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

/**
 * Return a new matrix containing the product of each value of the recipient and the argument.
 * This method exploits sparsity of the matrix, that is, considers only non-zero entries.
 *///  w  ww  .  j a  va2  s . c  o m
private Matrix times(final Matrix matrix, final double scala) {
    final Matrix result = matrix.clone();
    final Iterator<MatrixSlice> sliceIterator = matrix.iterator();
    while (sliceIterator.hasNext()) {
        final MatrixSlice slice = sliceIterator.next();
        final int row = slice.index();
        for (final Vector.Element e : slice.nonZeroes()) {
            final int col = e.index();
            result.set(row, col, e.get() * scala);
        }
    }
    return result;
}

From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java

License:Apache License

/**
 * Compute the covariance matrix from the statistics.
 * @return//  w  w w.  ja va2 s .  c  om
 */
public Matrix computeCovariance() {
    final Vector mean = computeMean();
    final Matrix covariance = outProdSum.clone();

    final Iterator<MatrixSlice> sliceIterator = outProdSum.iterator();
    while (sliceIterator.hasNext()) {
        final MatrixSlice slice = sliceIterator.next();
        final int row = slice.index();
        for (final Vector.Element e : slice.nonZeroes()) {
            final int col = e.index();
            final double squaredSum = e.get();
            covariance.set(row, col, squaredSum / probSum - mean.get(row) * mean.get(col));
        }
    }
    return covariance;
}