Example usage for org.apache.mahout.math SparseMatrix SparseMatrix

List of usage examples for org.apache.mahout.math SparseMatrix SparseMatrix

Introduction

In this page you can find the example usage for org.apache.mahout.math SparseMatrix SparseMatrix.

Prototype

public SparseMatrix(int rows, int columns) 

Source Link

Document

Construct a matrix with specified number of rows and columns.

Usage

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void main(String[] args) throws IOException {

    //final Configuration conf = new Configuration();
    //final FileSystem fs = FileSystem.get(conf);
    //final SequenceFile.Reader reader = new SequenceFile.Reader(fs,
    //   new Path("R1.dat"), conf);
    //IntWritable key = new IntWritable();
    //VectorWritable vec = new VectorWritable();
    Matrix mat = new SparseMatrix(1500, 100);
    //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 );
    BufferedReader reader = new BufferedReader(new FileReader("R.3.csv"));
    String line = null;/* w  ww.  j a v  a 2 s .co m*/
    while ((line = reader.readLine()) != null) {
        String[] parts = line.split(",");

        mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2]));
        /*
        Vector v = vec.get();
        int i=0;
        Iterator<Vector.Element> itr = v.iterateNonZero();
        while(itr.hasNext()){
           double elem = itr.next().get();
           if(elem !=0)
              mat2.setEntry(i, key.get(), elem);
           i++;
        }
        */
    }

    //mat = mat.transpose();
    System.out.println(mat.viewColumn(0).isDense());
    System.out.println(mat.viewRow(0).isDense());
    mat = mat.transpose();
    GramSchmidt.orthonormalizeColumns(mat);
    /*
    System.out.println("started QR");
    System.out.println(Runtime.getRuntime().maxMemory());
    System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory());
    QRDecomposition qr = new QRDecomposition(mat2);
    System.out.println(qr.getQ().getColumnDimension());
    System.out.println(qr.getQ().getRowDimension());
    */
    //mat = mat.transpose();
    //storeSparseColumns(mat);
    //for (int i = 0; i < 10; i++) {
    //   System.out.println(mat.viewRow(i).getNumNondefaultElements());
    //}

}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/***
 * If the MapDir matrix is small, we can convert it to an in memory representation
 * and then run efficient centralized operations
 * // w w w  . j  a v a  2  s .  com
 * @param origMtx in MapDir format (generated by MatrixOutputFormat)
 * @return a dense matrix including the data
 * @throws IOException 
 */
static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException {
    MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath());
    SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols());
    Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        mtx.viewRow(slice.index()).assign(slice.vector());
    }
    mapDir.close();
    return mtx;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert a 2-dimensional array to a sparse matrix in {@link MapDir} format
 * @param vectors a 2-dimensional array of doubles
 * @param outPath the path to which the dense matrix will be written
 * @param tmpPath an argument required to be passed to {@link DistributedRowMatrix}
 * @param label a unique label to name the output matrix directory
 * @return a {@link DistributedRowMatrix} pointing to the in-filesystem matrix
 * @throws Exception//  w ww.j  a v a2 s.c  om
 */
public static DistributedRowMatrix toSparseMapDir(double[][] vectors, Path outPath, Path tmpPath, String label)
        throws Exception {
    int nRows = vectors.length;
    int nCols = vectors[0].length;
    SparseMatrix m = new SparseMatrix(nRows, nCols);
    for (int r = 0; r < nRows; r++)
        m.set(r, vectors[r]);
    return AlgebraCommon.toMapDir(m, outPath, tmpPath, label);
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;/*  www.j av a  2  s . c o  m*/

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.ClusterSummaryListCodec.java

License:Apache License

@Override
public List<ClusterSummary> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final List<ClusterSummary> resultList = new ArrayList<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int numClusters = dais.readInt();
        final int dimension = dais.readInt();

        for (int i = 0; i < numClusters; i++) {
            final double prior = dais.readDouble();
            final Vector vector = new DenseVector(dimension);
            for (int j = 0; j < dimension; j++) {
                vector.set(j, dais.readDouble());
            }//from   w  w w.  j a v a 2  s. c o m
            Matrix matrix = null;
            if (isDiagonalCovariance) {
                matrix = new SparseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    matrix.set(j, j, dais.readDouble());
                }
            } else {
                matrix = new DenseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    for (int k = 0; k < dimension; k++) {
                        matrix.set(j, k, dais.readDouble());
                    }
                }
            }
            resultList.add(new ClusterSummary(prior, vector, matrix));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultList;
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntClusterStatsCodec.java

License:Apache License

@Override
public Map<Integer, ClusterStats> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final Map<Integer, ClusterStats> resultMap = new HashMap<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int mapSize = dais.readInt();
        final int dimension = dais.readInt();
        for (int i = 0; i < mapSize; i++) {
            final int id = dais.readInt();
            final double probSum = dais.readDouble();
            final Vector pointSum = new DenseVector(dimension);
            for (int j = 0; j < dimension; j++) {
                pointSum.set(j, dais.readDouble());
            }/*from  ww w .  jav  a 2 s  .  c  o m*/
            Matrix outProdSum = null;
            if (diagonalCovariance) {
                outProdSum = new SparseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    outProdSum.set(j, j, dais.readDouble());
                }
            } else {
                outProdSum = new DenseMatrix(dimension, dimension);
                for (int j = 0; j < dimension; j++) {
                    for (int k = 0; k < dimension; k++) {
                        outProdSum.set(j, k, dais.readDouble());
                    }
                }
            }
            resultMap.put(id, new ClusterStats(outProdSum, pointSum, probSum));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultMap;
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*from ww  w . j a  v a  2 s .  co  m*/
    }

    // Preconditions.checkNotNull(scoresPerFeature);
    // Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    Vector perlabelThetaNormalizer = scoresPerLabel.like();
    /*
     * for (Pair<Text,VectorWritable> entry : new
     * SequenceFileDirIterable<Text,VectorWritable>( new Path(base,
     * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(),
     * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.
     * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer =
     * entry.getSecond().get(); } }
     * 
     * Preconditions.checkNotNull(perlabelThetaNormalizer);
     */
    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perlabelThetaNormalizer, alphaI, false);
}

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

@Test
public void testMAHOUT_1221() {
    // create a matrix with an unassigned row 0
    Matrix matrix = new SparseMatrix(1, 1);
    Vector view = matrix.viewRow(0);
    final double value = 1.23;
    view.assign(value);/*from w w  w.j av  a2  s.  co  m*/
    // test whether the update in the view is reflected in the matrix
    assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON);
}