List of usage examples for org.apache.mahout.math SparseMatrix SparseMatrix
public SparseMatrix(int rows, int columns)
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void main(String[] args) throws IOException { //final Configuration conf = new Configuration(); //final FileSystem fs = FileSystem.get(conf); //final SequenceFile.Reader reader = new SequenceFile.Reader(fs, // new Path("R1.dat"), conf); //IntWritable key = new IntWritable(); //VectorWritable vec = new VectorWritable(); Matrix mat = new SparseMatrix(1500, 100); //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 ); BufferedReader reader = new BufferedReader(new FileReader("R.3.csv")); String line = null;/* w ww. j a v a 2 s .co m*/ while ((line = reader.readLine()) != null) { String[] parts = line.split(","); mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2])); /* Vector v = vec.get(); int i=0; Iterator<Vector.Element> itr = v.iterateNonZero(); while(itr.hasNext()){ double elem = itr.next().get(); if(elem !=0) mat2.setEntry(i, key.get(), elem); i++; } */ } //mat = mat.transpose(); System.out.println(mat.viewColumn(0).isDense()); System.out.println(mat.viewRow(0).isDense()); mat = mat.transpose(); GramSchmidt.orthonormalizeColumns(mat); /* System.out.println("started QR"); System.out.println(Runtime.getRuntime().maxMemory()); System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory()); QRDecomposition qr = new QRDecomposition(mat2); System.out.println(qr.getQ().getColumnDimension()); System.out.println(qr.getQ().getRowDimension()); */ //mat = mat.transpose(); //storeSparseColumns(mat); //for (int i = 0; i < 10; i++) { // System.out.println(mat.viewRow(i).getNumNondefaultElements()); //} }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/*** * If the MapDir matrix is small, we can convert it to an in memory representation * and then run efficient centralized operations * // w w w . j a v a 2 s . com * @param origMtx in MapDir format (generated by MatrixOutputFormat) * @return a dense matrix including the data * @throws IOException */ static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols()); Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); mtx.viewRow(slice.index()).assign(slice.vector()); } mapDir.close(); return mtx; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Convert a 2-dimensional array to a sparse matrix in {@link MapDir} format * @param vectors a 2-dimensional array of doubles * @param outPath the path to which the dense matrix will be written * @param tmpPath an argument required to be passed to {@link DistributedRowMatrix} * @param label a unique label to name the output matrix directory * @return a {@link DistributedRowMatrix} pointing to the in-filesystem matrix * @throws Exception// w ww.j a v a2 s.c om */ public static DistributedRowMatrix toSparseMapDir(double[][] vectors, Path outPath, Path tmpPath, String label) throws Exception { int nRows = vectors.length; int nCols = vectors[0].length; SparseMatrix m = new SparseMatrix(nRows, nCols); for (int r = 0; r < nRows; r++) m.set(r, vectors[r]); return AlgebraCommon.toMapDir(m, outPath, tmpPath, label); }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java
License:Apache License
@Override public void run(final int iteration) { clusterToStats = new HashMap<>(); final int numClusters = clusterSummaries.size(); // Compute the partial statistics of each cluster for (final Vector vector : points) { final int dimension = vector.size(); Matrix outProd = null;/* www.j av a 2 s . c o m*/ if (isCovarianceDiagonal) { outProd = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { outProd.set(j, j, vector.get(j) * vector.get(j)); } } else { outProd = vector.cross(vector); } double denominator = 0; final double[] numerators = new double[numClusters]; for (int i = 0; i < numClusters; i++) { final ClusterSummary clusterSummary = clusterSummaries.get(i); final Vector centroid = clusterSummary.getCentroid(); final Matrix covariance = clusterSummary.getCovariance(); final Double prior = clusterSummary.getPrior(); final Vector differ = vector.minus(centroid); numerators[i] = prior / Math.sqrt(covariance.determinant()) * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2)); denominator += numerators[i]; } for (int i = 0; i < numClusters; i++) { final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator; if (!clusterToStats.containsKey(i)) { clusterToStats.put(i, new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } else { clusterToStats.get(i).add( new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } } } }
From source file:edu.snu.dolphin.bsp.examples.ml.sub.ClusterSummaryListCodec.java
License:Apache License
@Override public List<ClusterSummary> decode(final byte[] data) { final ByteArrayInputStream bais = new ByteArrayInputStream(data); final List<ClusterSummary> resultList = new ArrayList<>(); try (final DataInputStream dais = new DataInputStream(bais)) { final int numClusters = dais.readInt(); final int dimension = dais.readInt(); for (int i = 0; i < numClusters; i++) { final double prior = dais.readDouble(); final Vector vector = new DenseVector(dimension); for (int j = 0; j < dimension; j++) { vector.set(j, dais.readDouble()); }//from w w w. j a v a 2 s. c o m Matrix matrix = null; if (isDiagonalCovariance) { matrix = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { matrix.set(j, j, dais.readDouble()); } } else { matrix = new DenseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { for (int k = 0; k < dimension; k++) { matrix.set(j, k, dais.readDouble()); } } } resultList.add(new ClusterSummary(prior, vector, matrix)); } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return resultList; }
From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntClusterStatsCodec.java
License:Apache License
@Override public Map<Integer, ClusterStats> decode(final byte[] data) { final ByteArrayInputStream bais = new ByteArrayInputStream(data); final Map<Integer, ClusterStats> resultMap = new HashMap<>(); try (final DataInputStream dais = new DataInputStream(bais)) { final int mapSize = dais.readInt(); final int dimension = dais.readInt(); for (int i = 0; i < mapSize; i++) { final int id = dais.readInt(); final double probSum = dais.readDouble(); final Vector pointSum = new DenseVector(dimension); for (int j = 0; j < dimension; j++) { pointSum.set(j, dais.readDouble()); }/*from ww w . jav a 2 s . c o m*/ Matrix outProdSum = null; if (diagonalCovariance) { outProdSum = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { outProdSum.set(j, j, dais.readDouble()); } } else { outProdSum = new DenseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { for (int k = 0; k < dimension; k++) { outProdSum.set(j, k, dais.readDouble()); } } } resultMap.put(id, new ClusterStats(outProdSum, pointSum, probSum)); } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return resultMap; }
From source file:mlbench.bayes.BayesUtils.java
License:Apache License
public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) { float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f); // read feature sums and label sums Vector scoresPerLabel = null; Vector scoresPerFeature = null; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) { String key = record.getFirst().toString(); VectorWritable value = record.getSecond(); if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) { scoresPerFeature = value.get(); } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) { scoresPerLabel = value.get(); }/*from ww w . j a v a 2 s . co m*/ } // Preconditions.checkNotNull(scoresPerFeature); // Preconditions.checkNotNull(scoresPerLabel); Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size()); for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) { scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get()); } Vector perlabelThetaNormalizer = scoresPerLabel.like(); /* * for (Pair<Text,VectorWritable> entry : new * SequenceFileDirIterable<Text,VectorWritable>( new Path(base, * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob. * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer = * entry.getSecond().get(); } } * * Preconditions.checkNotNull(perlabelThetaNormalizer); */ return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer, alphaI, false); }
From source file:org.qcri.pca.MahoutCompatibilityTest.java
License:Apache License
@Test public void testMAHOUT_1221() { // create a matrix with an unassigned row 0 Matrix matrix = new SparseMatrix(1, 1); Vector view = matrix.viewRow(0); final double value = 1.23; view.assign(value);/*from w w w.j av a2 s. co m*/ // test whether the update in the view is reflected in the matrix assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON); }