List of usage examples for org.apache.mahout.math Matrix set
void set(String rowLabel, int row, double[] rowData);
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void main(String[] args) throws IOException { //final Configuration conf = new Configuration(); //final FileSystem fs = FileSystem.get(conf); //final SequenceFile.Reader reader = new SequenceFile.Reader(fs, // new Path("R1.dat"), conf); //IntWritable key = new IntWritable(); //VectorWritable vec = new VectorWritable(); Matrix mat = new SparseMatrix(1500, 100); //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 ); BufferedReader reader = new BufferedReader(new FileReader("R.3.csv")); String line = null;//from w w w . j a va 2s . com while ((line = reader.readLine()) != null) { String[] parts = line.split(","); mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2])); /* Vector v = vec.get(); int i=0; Iterator<Vector.Element> itr = v.iterateNonZero(); while(itr.hasNext()){ double elem = itr.next().get(); if(elem !=0) mat2.setEntry(i, key.get(), elem); i++; } */ } //mat = mat.transpose(); System.out.println(mat.viewColumn(0).isDense()); System.out.println(mat.viewRow(0).isDense()); mat = mat.transpose(); GramSchmidt.orthonormalizeColumns(mat); /* System.out.println("started QR"); System.out.println(Runtime.getRuntime().maxMemory()); System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory()); QRDecomposition qr = new QRDecomposition(mat2); System.out.println(qr.getQ().getColumnDimension()); System.out.println(qr.getQ().getRowDimension()); */ //mat = mat.transpose(); //storeSparseColumns(mat); //for (int i = 0; i < 10; i++) { // System.out.println(mat.viewRow(i).getNumNondefaultElements()); //} }
From source file:com.cloudera.knittingboar.messages.TestParameterVector.java
License:Apache License
public void testSerde() throws IOException { int classes = 20; int features = 10000; //GradientBuffer g = new GradientBuffer( classes, features ); Matrix m = new DenseMatrix(classes, features); //m.set(0, 0, 0.1); //m.set(0, 1, 0.3); //g.numFeatures(); for (int c = 0; c < classes - 1; c++) { for (int f = 0; f < features; f++) { m.set(c, f, (double) ((double) f / 10.0f)); }/*from w w w.j a v a2 s . c o m*/ } System.out.println("matrix created..."); ParameterVector vec_gradient = new ParameterVector(); vec_gradient.SrcWorkerPassCount = pass_count; vec_gradient.parameter_vector = m; vec_gradient.AvgLogLikelihood = -1.368f; vec_gradient.PercentCorrect = 72.68f; vec_gradient.TrainedRecords = 2500; assertEquals(10000, vec_gradient.numFeatures()); assertEquals(10000, vec_gradient.parameter_vector.columnSize()); assertEquals(20, vec_gradient.numCategories()); assertEquals(20, vec_gradient.parameter_vector.rowSize()); byte[] buf = vec_gradient.Serialize(); ParameterVector vec_gradient_deserialized = new ParameterVector(); vec_gradient_deserialized.Deserialize(buf); assertEquals(pass_count, vec_gradient_deserialized.SrcWorkerPassCount); assertEquals(0.1, vec_gradient_deserialized.parameter_vector.get(0, 1)); assertEquals(0.2, vec_gradient_deserialized.parameter_vector.get(0, 2)); assertEquals(0.3, vec_gradient_deserialized.parameter_vector.get(0, 3)); assertEquals(0.4, vec_gradient_deserialized.parameter_vector.get(0, 4)); assertEquals(0.5, vec_gradient_deserialized.parameter_vector.get(0, 5)); assertEquals(-1.368f, vec_gradient_deserialized.AvgLogLikelihood); assertEquals(72.68f, vec_gradient_deserialized.PercentCorrect); assertEquals(2500, vec_gradient_deserialized.TrainedRecords); }
From source file:com.cloudera.knittingboar.messages.TestParameterVectorGradient.java
License:Apache License
public void testSerde() throws IOException { int classes = 20; int features = 10000; //GradientBuffer g = new GradientBuffer( classes, features ); Matrix m = new DenseMatrix(classes, features); //m.set(0, 0, 0.1); //m.set(0, 1, 0.3); //g.numFeatures(); for (int c = 0; c < classes - 1; c++) { for (int f = 0; f < features; f++) { m.set(c, f, (double) ((double) f / 10.0f)); }//from ww w . j av a 2s.c om } System.out.println("matrix created..."); ParameterVectorGradient vec_gradient = new ParameterVectorGradient(); vec_gradient.SrcWorkerPassCount = pass_count; vec_gradient.parameter_vector = m; vec_gradient.AvgLogLikelihood = -1.368f; vec_gradient.PercentCorrect = 72.68f; vec_gradient.TrainedRecords = 2500; assertEquals(10000, vec_gradient.numFeatures()); assertEquals(10000, vec_gradient.parameter_vector.columnSize()); assertEquals(20, vec_gradient.numCategories()); assertEquals(20, vec_gradient.parameter_vector.rowSize()); byte[] buf = vec_gradient.Serialize(); ParameterVectorGradient vec_gradient_deserialized = new ParameterVectorGradient(); vec_gradient_deserialized.Deserialize(buf); assertEquals(pass_count, vec_gradient_deserialized.SrcWorkerPassCount); assertEquals(0.1, vec_gradient_deserialized.parameter_vector.get(0, 1)); assertEquals(0.2, vec_gradient_deserialized.parameter_vector.get(0, 2)); assertEquals(0.3, vec_gradient_deserialized.parameter_vector.get(0, 3)); assertEquals(0.4, vec_gradient_deserialized.parameter_vector.get(0, 4)); assertEquals(0.5, vec_gradient_deserialized.parameter_vector.get(0, 5)); assertEquals(-1.368f, vec_gradient_deserialized.AvgLogLikelihood); assertEquals(72.68f, vec_gradient_deserialized.PercentCorrect); assertEquals(2500, vec_gradient_deserialized.TrainedRecords); }
From source file:com.cloudera.knittingboar.sgd.TestGradientBuffer.java
License:Apache License
public void testAccumulateGradientMatrix() { GradientBuffer g0 = new GradientBuffer(2, 2); g0.setCell(0, 0, 0.4);//from w w w .j av a2 s . c o m g0.setCell(0, 1, 0.3); assertEquals(g0.numFeatures(), 2); /* GradientBuffer g1 = new GradientBuffer( 2, 2 ); g1.setCell(0, 0, 0.1); g1.setCell(0, 1, 0.3); assertEquals( g1.numFeatures(), 2 ); */ Matrix m = new DenseMatrix(2, 2); m.set(0, 0, 0.1); m.set(0, 1, 0.3); g0.AccumulateGradient(m); //m.get(arg0, arg1) // check source assertEquals(m.get(0, 0), 0.1); // check accumlation in g0 assertEquals(g0.getCell(0, 0), 0.5); // check source assertEquals(m.get(0, 1), 0.3); // check accumlation in g0 assertEquals(g0.getCell(0, 1), 0.6); System.out.println("matrix accumulation test done!"); assertNotNull(0); }
From source file:com.cloudera.knittingboar.sgd.TestGradientBuffer.java
License:Apache License
public void testAverageGradientBuffer() { System.out.println("testAverageGradientBuffer --------"); GradientBuffer g0 = new GradientBuffer(2, 2); g0.setCell(0, 0, 0.1d);// www . j av a 2 s . c o m g0.setCell(0, 1, 0.5d); assertEquals(g0.numFeatures(), 2); Matrix m = new DenseMatrix(2, 2); m.set(0, 0, 0.5d); m.set(0, 1, 0.1d); g0.AccumulateGradient(m); //m.get(arg0, arg1) // check source assertEquals(m.get(0, 0), 0.5d); // check accumlation in g0 //assertEquals( g0.getCell(0, 0), 0.6 ); junit.framework.Assert.assertEquals(0.6d, g0.getCell(0, 0), 0.0001); // check source assertEquals(m.get(0, 1), 0.1d); // check accumlation in g0 // assertEquals( g0.getCell(0, 1), 0.6 ); Utils.PrintVectorNonZero(g0.gamma.viewRow(0)); //Utils.PrintVectorNonZero(g0.gamma.viewRow(1)); g0.AverageAccumulations(2); Utils.PrintVectorNonZero(g0.gamma.viewRow(0)); System.out.println("matrix accumulation AVG test done!"); //System.out.println( "add test: " + ( 10.0d + 6.0d ) ); assertNotNull(0); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java
License:Apache License
/** * [ ******* Rebuilding this currently ******* ] * /* w ww .j a v a 2 s . c o m*/ * Tests replacing the beta, presumably from the master, after we've run POLR a bit * @throws Exception */ public void testReplaceBetaMechanics() throws Exception { System.out.println("\n------ testReplaceBetaMechanics --------- "); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); System.out.println("split count: " + splits.length); POLRWorkerNode worker_model_builder = new POLRWorkerNode(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff worker_model_builder.setup(this.generateDebugConfigurationObject()); System.out.println("split: " + splits[0].toString()); TextRecordParser txt_reader = new TextRecordParser(); long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]); txt_reader.setFile(splits[0].toString().split(":")[1], 0, len); worker_model_builder.setRecordParser(txt_reader); // worker_model_builder.RunNextTrainingBatch(); worker_model_builder.compute(); // worker_model_builder.polr.Set // ------------------- now replace beta ------------ double val1 = -1.0; // GradientBuffer g0 = new GradientBuffer( 2, worker_model_builder.FeatureVectorSize ); Matrix m = new DenseMatrix(2, feature_vector_size); for (int x = 0; x < feature_vector_size; x++) { m.set(0, x, val1); } worker_model_builder.polr.SetBeta(m); for (int x = 0; x < feature_vector_size; x++) { assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1); } System.out.println("--------------------------------\n"); }
From source file:com.skp.experiment.common.MathHelper.java
License:Apache License
/** * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable> *///from w ww .j a v a 2s . co m public static Matrix readMatrix(Configuration conf, Path path, int rows, int columns) { boolean readOneRow = false; Matrix matrix = new DenseMatrix(rows, columns); for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path, true, conf)) { IntWritable key = record.getFirst(); VectorWritable value = record.getSecond(); readOneRow = true; int row = key.get(); Iterator<Vector.Element> elementsIterator = value.get().iterateNonZero(); while (elementsIterator.hasNext()) { Vector.Element element = elementsIterator.next(); matrix.set(row, element.index(), element.get()); } } if (!readOneRow) { throw new IllegalStateException("Not a single row read!"); } return matrix; }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java
License:Apache License
@Override public void run(final int iteration) { clusterToStats = new HashMap<>(); final int numClusters = clusterSummaries.size(); // Compute the partial statistics of each cluster for (final Vector vector : points) { final int dimension = vector.size(); Matrix outProd = null; if (isCovarianceDiagonal) { outProd = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { outProd.set(j, j, vector.get(j) * vector.get(j)); }/*from w w w. jav a 2 s . co m*/ } else { outProd = vector.cross(vector); } double denominator = 0; final double[] numerators = new double[numClusters]; for (int i = 0; i < numClusters; i++) { final ClusterSummary clusterSummary = clusterSummaries.get(i); final Vector centroid = clusterSummary.getCentroid(); final Matrix covariance = clusterSummary.getCovariance(); final Double prior = clusterSummary.getPrior(); final Vector differ = vector.minus(centroid); numerators[i] = prior / Math.sqrt(covariance.determinant()) * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2)); denominator += numerators[i]; } for (int i = 0; i < numClusters; i++) { final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator; if (!clusterToStats.containsKey(i)) { clusterToStats.put(i, new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } else { clusterToStats.get(i).add( new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } } } }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java
License:Apache License
/** * Return a new matrix containing the product of each value of the recipient and the argument. * This method exploits sparsity of the matrix, that is, considers only non-zero entries. */// w ww . j a va2 s . c o m private Matrix times(final Matrix matrix, final double scala) { final Matrix result = matrix.clone(); final Iterator<MatrixSlice> sliceIterator = matrix.iterator(); while (sliceIterator.hasNext()) { final MatrixSlice slice = sliceIterator.next(); final int row = slice.index(); for (final Vector.Element e : slice.nonZeroes()) { final int col = e.index(); result.set(row, col, e.get() * scala); } } return result; }
From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java
License:Apache License
/** * Compute the covariance matrix from the statistics. * @return// w w w. ja va2 s . c om */ public Matrix computeCovariance() { final Vector mean = computeMean(); final Matrix covariance = outProdSum.clone(); final Iterator<MatrixSlice> sliceIterator = outProdSum.iterator(); while (sliceIterator.hasNext()) { final MatrixSlice slice = sliceIterator.next(); final int row = slice.index(); for (final Vector.Element e : slice.nonZeroes()) { final int col = e.index(); final double squaredSum = e.get(); covariance.set(row, col, squaredSum / probSum - mean.get(row) * mean.get(col)); } } return covariance; }