List of usage examples for org.apache.mahout.math Matrix times
Matrix times(Matrix x);
From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java
License:Apache License
public static Vector solve(Iterable<Vector> featureVectors, Vector ratingVector, double lambda, int numFeatures) { Preconditions.checkNotNull(featureVectors, "Feature Vectors cannot be null"); Preconditions.checkArgument(!Iterables.isEmpty(featureVectors)); Preconditions.checkNotNull(ratingVector, "Rating Vector cannot be null"); Preconditions.checkArgument(ratingVector.getNumNondefaultElements() > 0, "Rating Vector cannot be empty"); Preconditions.checkArgument(Iterables.size(featureVectors) == ratingVector.getNumNondefaultElements()); int nui = ratingVector.getNumNondefaultElements(); Matrix MiIi = createMiIi(featureVectors, numFeatures); Matrix RiIiMaybeTransposed = createRiIiMaybeTransposed(ratingVector); /* compute Ai = MiIi * t(MiIi) + lambda * nui * E */ Matrix Ai = miTimesMiTransposePlusLambdaTimesNuiTimesE(MiIi, lambda, nui); /* compute Vi = MiIi * t(R(i,Ii)) */ Matrix Vi = MiIi.times(RiIiMaybeTransposed); /* compute Ai * ui = Vi */ return solve(Ai, Vi); }
From source file:com.mapr.stats.bandit.ContextualBayesBanditTest.java
License:Apache License
@Test public void testConvergence() { final Random rand = RandomUtils.getRandom(); Matrix recipes = new DenseMatrix(100, 10).assign(new DoubleFunction() { @Override//w w w .j av a 2 s.c o m public double apply(double arg1) { return rand.nextDouble() < 0.2 ? 1 : 0; } }); recipes.viewColumn(9).assign(1); Vector actualWeights = new DenseVector(new double[] { 1, 0.25, -0.25, 0, 0, 0, 0, 0, 0, -1 }); Vector probs = recipes.times(actualWeights); ContextualBayesBandit banditry = new ContextualBayesBandit(recipes); for (int i = 0; i < 1000; i++) { int k = banditry.sample(); final boolean success = rand.nextDouble() < probs.get(k); banditry.train(k, success); } }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
/** * small integration test that runs the full job * * <pre>//from w ww .j ava 2s.c o m * * user-item-matrix * * burger hotdog berries icecream * dog 5 5 2 - * rabbit 2 - 3 5 * cow - 5 - 3 * donkey 3 - - 5 * * </pre> */ @Test public void completeJobToyExample() throws Exception { Double na = Double.NaN; Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, na }), new DenseVector(new double[] { 2.0, na, 3.0, 5.0 }), new DenseVector(new double[] { na, 5.0, na, 3.0 }), new DenseVector(new double[] { 3.0, na, na, 5.0 }) }); writeLines(inputFile, preferencesAsText(preferences)); indexSizeFile.deleteOnExit(); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); int numFeatures = 3; int numIterations = 5; double lambda = 0.065; alsFactorization .run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), preferences.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), preferences.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero(); while (elementIterator.hasNext()) { Vector.Element e = elementIterator.next(); if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double err = pref - estimate; avg.addDatum(err * err); log.info("Comparing preference of user [{}] towards item [{}], was [{}] estimate is [{}]", new Object[] { slice.index(), e.index(), pref, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.2); }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJobTest.java
License:Apache License
@Test public void completeJobImplicitToyExample() throws Exception { Matrix observations = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }), new DenseVector(new double[] { 2.0, 0, 3.0, 5.0 }), new DenseVector(new double[] { 0, 5.0, 0, 3.0 }), new DenseVector(new double[] { 3.0, 0, 0, 5.0 }) }); Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] { new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }), new DenseVector(new double[] { 1.0, 0, 1.0, 1.0 }), new DenseVector(new double[] { 0, 1.0, 0, 1.0 }), new DenseVector(new double[] { 1.0, 0, 0, 1.0 }) }); writeLines(inputFile, preferencesAsText(observations)); writeLines(indexSizeFile, "0,4\n1,4"); ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf);/*from ww w . j a va 2 s . co m*/ int numFeatures = 3; int numIterations = 5; double lambda = 0.065; double alpha = 20; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations), "--indexSizes", indexSizeFile.toString(), "--useTransform", "false" }); Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"), observations.numRows(), numFeatures); Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"), observations.numCols(), numFeatures); StringBuilder info = new StringBuilder(); info.append("\nObservations - users x items\n"); info.append(MathHelper.nice(observations)); info.append("\nA - users x items\n\n"); info.append(MathHelper.nice(preferences)); info.append("\nU - users x features\n\n"); info.append(MathHelper.nice(u)); info.append("\nM - items x features\n\n"); info.append(MathHelper.nice(m)); Matrix Ak = u.times(m.transpose()); info.append("\nAk - users x items\n\n"); info.append(MathHelper.nice(Ak)); info.append('\n'); log.info(info.toString()); RunningAverage avg = new FullRunningAverage(); Iterator<MatrixSlice> sliceIterator = preferences.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); for (Vector.Element e : slice.vector()) { if (!Double.isNaN(e.get())) { double pref = e.get(); double estimate = u.viewRow(slice.index()).dot(m.viewRow(e.index())); double confidence = 1 + alpha * observations.getQuick(slice.index(), e.index()); double err = confidence * (pref - estimate) * (pref - estimate); avg.addDatum(err); log.info( "Comparing preference of user [{}] towards item [{}], was [{}] with confidence [{}] " + "estimate is [{}]", new Object[] { slice.index(), e.index(), pref, confidence, estimate }); } } } double rmse = Math.sqrt(avg.getAverage()); log.info("RMSE: {}", rmse); assertTrue(rmse < 0.4); }
From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresSolver.java
License:Apache License
private Vector solve(Iterable<Vector> featureVectors, Vector ratingVector, double lambda, int numFeatures) { Preconditions.checkNotNull(featureVectors, "Feature vectors cannot be null"); Preconditions.checkArgument(!Iterables.isEmpty(featureVectors)); Preconditions.checkNotNull(ratingVector, "rating vector cannot be null"); Preconditions.checkArgument(ratingVector.getNumNondefaultElements() > 0, "Rating vector cannot be empty"); Preconditions.checkArgument(Iterables.size(featureVectors) == ratingVector.getNumNondefaultElements()); int nui = ratingVector.getNumNondefaultElements(); Matrix MiIi = createMiIi(featureVectors, numFeatures); Matrix RiIiMaybeTransposed = createRiIiMaybeTransposed(ratingVector); /* compute Ai = MiIi * t(MiIi) + lambda * nui * E */ Matrix Ai = addLambdaTimesNuiTimesE(MiIi.times(MiIi.transpose()), lambda, nui); /* compute Vi = MiIi * t(R(i,Ii)) */ Matrix Vi = MiIi.times(RiIiMaybeTransposed); /* compute Ai * ui = Vi */ return solve(Ai, Vi); }
From source file:org.mpei.knn.lsh.tools.HashedVector.java
License:Apache License
public static int computeHash(Vector v, Matrix projection) { int hash = 0; for (Element element : projection.times(v)) { if (element.get() > 0) { hash += 1 << element.index(); }/*ww w . ja va 2 s . c o m*/ } return hash; }
From source file:org.mpei.knn.lsh.tools.HashedVector.java
License:Apache License
public static long computeHash64(Vector v, Matrix projection) { long hash = 0; for (Element element : projection.times(v)) { if (element.get() > 0) { hash += 1L << element.index(); }/*w w w. ja v a 2 s. com*/ } return hash; }
From source file:org.qcri.pca.SPCADriver.java
/** * Run sPCA//from w ww .j a v a 2 s . c o m * * @param conf * the configuration * @param input * the path to the input matrix Y * @param output * the path to the output (currently for normalization output) * @param nRows * number of rows in input matrix * @param nCols * number of columns in input matrix * @param nPCs * number of desired principal components * @param splitFactor * divide the block size by this number to increase parallelism * @param round * the initial round index, used for naming each round output * @param LAST_ROUND * the index of the last round * @param sampleRate * if < 1, the input is sampled during normalization * @return the error * @throws Exception */ double runMapReduce(Configuration conf, DistributedRowMatrix distY, InitialValues initVal, Path output, final int nRows, final int nCols, final int nPCs, final int splitFactor, final float errSampleRate, final int LAST_ROUND, final int normalize) throws Exception { int round = 0; //The two PPCA variables that improve over each iteration double ss = initVal.ss; Matrix centralC = initVal.C; //initial CtC Matrix centralCtC = centralC.transpose().times(centralC); final float threshold = 0.00001f; int sampleRate = 1; //1. compute mean and span DenseVector ym = new DenseVector(distY.numCols()); //ym=mean(distY) MeanAndSpanJob masJob = new MeanAndSpanJob(); boolean normalizeMean = false; if (normalize == 1) normalizeMean = true; Path meanSpanPath = masJob.compuateMeanAndSpan(distY.getRowPath(), output, ym, normalizeMean, conf, "" + round + "-init"); Path normalizedYPath = null; //2. normalize the input matrix Y if (normalize == 1) { NormalizeJob normalizeJob = new NormalizeJob(); normalizedYPath = normalizeJob.normalize(conf, distY.getRowPath(), meanSpanPath, output, sampleRate, "" + round + "-init"); distY = new DistributedRowMatrix(normalizedYPath, getTempPath(), nRows, nCols); distY.setConf(conf); //After normalization, set the split factor if (splitFactor > 1) { FileSystem fss = FileSystem.get(normalizedYPath.toUri(), conf); long blockSize = fss.getDefaultBlockSize() / splitFactor; conf.set("mapred.max.split.size", Long.toString(blockSize)); } } if (normalizedYPath == null) normalizedYPath = distY.getRowPath(); //3. compute the 2-norm of Y Norm2Job normJob = new Norm2Job(); double norm2 = normJob.computeFNorm(conf, normalizedYPath, meanSpanPath, getTempPath(), "" + round + "-init"); if (sampleRate < 1) { // rescale norm2 = norm2 / sampleRate; } DenseVector xm = new DenseVector(nPCs); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); DistributedRowMatrix distY2X = null; DistributedRowMatrix distC = null; double prevObjective = Double.MAX_VALUE; double error = 0; double relChangeInObjective = Double.MAX_VALUE; double prevError = Double.MAX_VALUE; for (; (round < LAST_ROUND && relChangeInObjective > threshold && prevError > 0.02); round++) { // Sx = inv( ss * eye(d) + CtC ); Matrix centralSx = centralCtC.clone(); centralSx.viewDiagonal().assign(Functions.plus(ss)); centralSx = inv(centralSx); // X = Y * C * Sx' => Y2X = C * Sx' Matrix centralY2X = centralC.times(centralSx.transpose()); distY2X = PCACommon.toDistributedRowMatrix(centralY2X, getTempPath(), getTempPath(), "CSxt" + round); // Xm = Ym * Y2X PCACommon.denseVectorTimesMatrix(ym, centralY2X, xm); // We skip computing X as we generate it on demand using Y and Y2X //Compute X'X and Y'X CompositeJob compositeJob = new CompositeJob(); compositeJob.computeYtXandXtX(distY, distY2X, ym, xm, getTempPath(), conf, "" + round); Matrix centralXtX = compositeJob.xtx; Matrix centralYtX = compositeJob.ytx; if (sampleRate < 1) { // rescale centralXtX.assign(Functions.div(sampleRate)); centralYtX.assign(Functions.div(sampleRate)); } // XtX = X'*X + ss * Sx final double finalss = ss; centralXtX.assign(centralSx, new DoubleDoubleFunction() { @Override public double apply(double arg1, double arg2) { return arg1 + finalss * arg2; } }); // C = (Ye'*X) / SumXtX; Matrix invXtX_central = inv(centralXtX); centralC = centralYtX.times(invXtX_central); distC = PCACommon.toDistributedRowMatrix(centralC, getTempPath(), getTempPath(), "C" + round); centralCtC = centralC.transpose().times(centralC); // Compute new value for ss // ss = ( sum(sum(Ye.^2)) + PCACommon.trace(XtX*CtC) - 2sum(XiCtYit) ) // /(N*D); double ss2 = PCACommon.trace(centralXtX.times(centralCtC)); VarianceJob varianceJob = new VarianceJob(); double xctyt = varianceJob.computeVariance(distY, ym, distY2X, xm, distC, getTempPath(), conf, "" + round); if (sampleRate < 1) { // rescale xctyt = xctyt / sampleRate; } ss = (norm2 + ss2 - 2 * xctyt) / (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss + " (" + norm2 + " + " + ss2 + " -2* " + xctyt); double traceSx = PCACommon.trace(centralSx); double traceXtX = PCACommon.trace(centralXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceXtX + " " + traceC + " " + traceCtC); double objective = ss; relChangeInObjective = Math.abs(1 - objective / prevObjective); prevObjective = objective; log.info("Objective: %.6f relative change: %.6f \n", objective, relChangeInObjective); if (!CALCULATE_ERR_ATTHEEND) { log.info("Computing the error at round " + round + " ..."); ReconstructionErrJob errJob = new ReconstructionErrJob(); error = errJob.reconstructionErr(distY, distY2X, distC, centralC, ym, xm, errSampleRate, conf, getTempPath(), "" + round); log.info("... end of computing the error at round " + round); prevError = error; } } if (CALCULATE_ERR_ATTHEEND) { log.info("Computing the error at round " + round + " ..."); ReconstructionErrJob errJob = new ReconstructionErrJob(); error = errJob.reconstructionErr(distY, distY2X, distC, centralC, ym, xm, errSampleRate, conf, getTempPath(), "" + round); log.info("... end of computing the error at round " + round); } initVal.C = centralC; initVal.ss = ss; writeMatrix(initVal.C, output, getTempPath(), "PCs"); return error; }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * //w w w .j a v a 2 s .com * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) throws Exception { Matrix centralC = initVal.C; double ss = initVal.ss; final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); //ignore NaN elements by replacing them with 0 for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } //centralize and normalize the input matrix Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); //also normalize the matrix by dividing each element by its columns range Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count Matrix centralX = null; int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // Sx = inv( eye(d) + CtC/ss ); Matrix Sx = eye(nPCs).times(ss).plus(centralCtC); Sx = inv(Sx); // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(Sx.transpose()); // XtX = X'*X + ss * Sx; Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss)); // C = (Ye'*X) / XtX; Matrix tmpInv = inv(centralXtX); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D); double norm2 = centralY.clone().assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC)); //ss3 = sum (X(i:0) * C' * Y(i,:)') DenseVector resVector = new DenseVector(nCols); double xctyt = 0; for (int i = 0; i < nRows; i++) { PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector); double res = resVector.dot(centralY.viewRow(i)); xctyt += res; } ss -= 2 * xctyt; ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(Sx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); double objective = ss; double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; log.info("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Ye is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * //from w w w . ja v a 2 s . co m * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) { Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of // initVal final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); // Y = Y - mean(Ye) // Also normalize the matrix for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); // -------------------------- initialization // CtC = C'*C; Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); // X = Ye * C * inv(CtC); Matrix centralX = centralY.times(centralC).times(inv(centralCtC)); log.info("tracex= " + PCACommon.trace(centralX)); // recon = X * C'; Matrix recon = centralX.times(centralC.transpose()); log.info("tracerec= " + PCACommon.trace(recon)); // ss = sum(sum((recon-Ye).^2)) / (N*D-missing); double ss = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum() / (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // ------------------ E-step, (co)variances // Sx = inv( eye(d) + CtC/ss ); Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss)); centralSx = inv(centralSx); // ------------------ E-step expected value // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(centralSx.divide(ss)); // ------------------ M-step // SumXtX = X'*X; Matrix centralSumXtX = centralX.transpose().times(centralX); // C = (Ye'*X) / (SumXtX + N*Sx ); Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows))); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) + // missing*ss_old ) /(N*D); recon = centralX.times(centralC.transpose()); double error = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum(); ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(centralSx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralSumXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) ) // +PCACommon.trace(SumXtX) -missing*log(ss_old); double objective = nRows * nCols + nRows * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant())) + PCACommon.trace(centralSumXtX); double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; System.out.printf("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Y is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }