Example usage for org.apache.mahout.math SparseMatrix SparseMatrix

Introduction

In this page you can find the example usage for org.apache.mahout.math SparseMatrix SparseMatrix.

Prototype

public SparseMatrix(int rows, int columns)

Source Link

Document

Construct a matrix with specified number of rows and columns.

Usage

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void main(String[] args) throws IOException {

    //final Configuration conf = new Configuration();
    //final FileSystem fs = FileSystem.get(conf);
    //final SequenceFile.Reader reader = new SequenceFile.Reader(fs,
    //   new Path("R1.dat"), conf);
    //IntWritable key = new IntWritable();
    //VectorWritable vec = new VectorWritable();
    Matrix mat = new SparseMatrix(1500, 100);
    //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 );
    BufferedReader reader = new BufferedReader(new FileReader("R.3.csv"));
    String line = null;/* w  ww.  j a v  a 2 s .co m*/
    while ((line = reader.readLine()) != null) {
        String[] parts = line.split(",");

        mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2]));
        /*
        Vector v = vec.get();
        int i=0;
        Iterator<Vector.Element> itr = v.iterateNonZero();
        while(itr.hasNext()){
           double elem = itr.next().get();
           if(elem !=0)
              mat2.setEntry(i, key.get(), elem);
           i++;
        }
        */
    }

    //mat = mat.transpose();
    System.out.println(mat.viewColumn(0).isDense());
    System.out.println(mat.viewRow(0).isDense());
    mat = mat.transpose();
    GramSchmidt.orthonormalizeColumns(mat);
    /*
    System.out.println("started QR");
    System.out.println(Runtime.getRuntime().maxMemory());
    System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory());
    QRDecomposition qr = new QRDecomposition(mat2);
    System.out.println(qr.getQ().getColumnDimension());
    System.out.println(qr.getQ().getRowDimension());
    */
    //mat = mat.transpose();
    //storeSparseColumns(mat);
    //for (int i = 0; i < 10; i++) {
    //   System.out.println(mat.viewRow(i).getNumNondefaultElements());
    //}

}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/***
 * If the MapDir matrix is small, we can convert it to an in memory representation
 * and then run efficient centralized operations
 * // w w w  . j  a v a  2  s .  com
 * @param origMtx in MapDir format (generated by MatrixOutputFormat)
 * @return a dense matrix including the data
 * @throws IOException 
 */
static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException {
    MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath());
    SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols());
    Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        mtx.viewRow(slice.index()).assign(slice.vector());
    }
    mapDir.close();
    return mtx;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert a 2-dimensional array to a sparse matrix in {@link MapDir} format
 * @param vectors a 2-dimensional array of doubles
 * @param outPath the path to which the dense matrix will be written
 * @param tmpPath an argument required to be passed to {@link DistributedRowMatrix}
 * @param label a unique label to name the output matrix directory
 * @return a {@link DistributedRowMatrix} pointing to the in-filesystem matrix
 * @throws Exception//  w ww.j  a v a2 s.c  om
 */
public static DistributedRowMatrix toSparseMapDir(double[][] vectors, Path outPath, Path tmpPath, String label)
        throws Exception {
    int nRows = vectors.length;
    int nCols = vectors[0].length;
    SparseMatrix m = new SparseMatrix(nRows, nCols);
    for (int r = 0; r < nRows; r++)
        m.set(r, vectors[r]);
    return AlgebraCommon.toMapDir(m, outPath, tmpPath, label);
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;/*  www.j av a  2  s . c o  m*/

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.ClusterSummaryListCodec.java

License:Apache License

@Override
public List<ClusterSummary> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final List<ClusterSummary> resultList = new ArrayList<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int numClusters = dais.readInt();
        final int dimension = dais.readInt();

        for (int i = 0; i < numClusters; i++) {
            final double prior = dais.readDouble();
            final Vector vector = new DenseVector(dimension);
            for (int j = 0; j < dimension; j++) {
                vector.set(j, dais.readDouble());
            }//from   w  w w.  j a v a 2  s. c o m
            Matrix matrix = null;
            if (isDiagonalCovariance) {
                matrix = new SparseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    matrix.set(j, j, dais.readDouble());
                }
            } else {
                matrix = new DenseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    for (int k = 0; k < dimension; k++) {
                        matrix.set(j, k, dais.readDouble());
                    }
                }
            }
            resultList.add(new ClusterSummary(prior, vector, matrix));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultList;
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntClusterStatsCodec.java

License:Apache License

@Override
public Map<Integer, ClusterStats> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final Map<Integer, ClusterStats> resultMap = new HashMap<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int mapSize = dais.readInt();
        final int dimension = dais.readInt();
        for (int i = 0; i < mapSize; i++) {
            final int id = dais.readInt();
            final double probSum = dais.readDouble();
            final Vector pointSum = new DenseVector(dimension);
            for (int j = 0; j < dimension; j++) {
                pointSum.set(j, dais.readDouble());
            }/*from  ww w .  jav  a 2 s  .  c  o m*/
            Matrix outProdSum = null;
            if (diagonalCovariance) {
                outProdSum = new SparseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    outProdSum.set(j, j, dais.readDouble());
                }
            } else {
                outProdSum = new DenseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    for (int k = 0; k < dimension; k++) {
                        outProdSum.set(j, k, dais.readDouble());
                    }
                }
            }
            resultMap.put(id, new ClusterStats(outProdSum, pointSum, probSum));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultMap;
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*from ww  w . j a  v a  2 s .  co  m*/
    }

    // Preconditions.checkNotNull(scoresPerFeature);
    // Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    Vector perlabelThetaNormalizer = scoresPerLabel.like();
    /*
     * for (Pair<Text,VectorWritable> entry : new
     * SequenceFileDirIterable<Text,VectorWritable>( new Path(base,
     * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(),
     * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.
     * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer =
     * entry.getSecond().get(); } }
     * 
     * Preconditions.checkNotNull(perlabelThetaNormalizer);
     */
    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perlabelThetaNormalizer, alphaI, false);
}

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

@Test
public void testMAHOUT_1221() {
    // create a matrix with an unassigned row 0
    Matrix matrix = new SparseMatrix(1, 1);
    Vector view = matrix.viewRow(0);
    final double value = 1.23;
    view.assign(value);/*from w w  w.j av  a2  s.  co  m*/
    // test whether the update in the view is reflected in the matrix
    assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON);
}