Example usage for org.apache.mahout.math Matrix clone

List of usage examples for org.apache.mahout.math Matrix clone

Introduction

In this page you can find the example usage for org.apache.mahout.math Matrix clone.

Prototype

Matrix clone();

Source Link

Document

Return a copy of the recipient

Usage

From source file:com.cloudera.knittingboar.sgd.ParallelOnlineLogisticRegression.java

License:Apache License

public void SetBeta(Matrix beta_mstr_cpy) {

    this.beta = beta_mstr_cpy.clone();

}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

/**
 * Return a new matrix containing the product of each value of the recipient and the argument.
 * This method exploits sparsity of the matrix, that is, considers only non-zero entries.
 *//* w  w w.j av  a2 s.  c o m*/
private Matrix times(final Matrix matrix, final double scala) {
    final Matrix result = matrix.clone();
    final Iterator<MatrixSlice> sliceIterator = matrix.iterator();
    while (sliceIterator.hasNext()) {
        final MatrixSlice slice = sliceIterator.next();
        final int row = slice.index();
        for (final Vector.Element e : slice.nonZeroes()) {
            final int col = e.index();
            result.set(row, col, e.get() * scala);
        }
    }
    return result;
}

From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java

License:Apache License

/**
 * We may select whether to create a deep copy of @member pointSum and @member outProdSum, or just a reference.
 * @param outProdSum//from   w w  w .jav  a 2s .  c o m
 * @param pointSum
 * @param probSum
 * @param isDeepCopy
 */
public ClusterStats(final Matrix outProdSum, final Vector pointSum, final double probSum,
        final boolean isDeepCopy) {
    if (isDeepCopy) {
        this.outProdSum = outProdSum.clone();
        this.pointSum = pointSum.clone();
    } else {
        this.outProdSum = outProdSum;
        this.pointSum = pointSum;
    }
    this.probSum = probSum;
}

From source file:org.qcri.pca.PCATest.java

License:Apache License

@Test
public void crossTestIterationOfMapReducePPCASequentialPPCA() throws Exception {
    Matrix C_central = PCACommon.randomMatrix(D, d);
    double ss = PCACommon.randSS();
    InitialValues initValSeq = new InitialValues(C_central, ss);
    InitialValues initValMR = new InitialValues(C_central.clone(), ss);

    //1. run sequential
    Matrix Ye_central = new DenseMatrix(N, D);
    int row = 0;/*w w  w .  j a va2 s.c om*/
    for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null,
            conf)) {
        Ye_central.assignRow(row, vw.get());
        row++;
    }
    double bishopSeqErr = ppcaDriver.runSequential(conf, Ye_central, initValSeq, 1);

    //2. run mapreduce
    DistributedRowMatrix Ye = new DistributedRowMatrix(input, tmp, N, D);
    Ye.setConf(conf);
    double bishopMRErr = ppcaDriver.runMapReduce(conf, Ye, initValMR, output, N, D, d, 1, 1, 1, 1);

    Assert.assertEquals("ss value is different in sequential and mapreduce PCA", initValSeq.ss, initValMR.ss,
            EPSILON);
    double seqCTrace = PCACommon.trace(initValSeq.C);
    double mrCTrace = PCACommon.trace(initValMR.C);
    Assert.assertEquals("C value is different in sequential and mapreduce PCA", seqCTrace, mrCTrace, EPSILON);
    Assert.assertEquals("The PPCA error between sequntial and mapreduce methods is too different: "
            + bishopSeqErr + "!= " + bishopMRErr, bishopSeqErr, bishopMRErr, EPSILON);
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run sPCA//ww  w  .  j a  va2  s. co  m
 * 
 * @param conf
 *          the configuration
 * @param input
 *          the path to the input matrix Y
 * @param output
 *          the path to the output (currently for normalization output)
 * @param nRows
 *          number of rows in input matrix
 * @param nCols
 *          number of columns in input matrix
 * @param nPCs
 *          number of desired principal components
 * @param splitFactor
 *          divide the block size by this number to increase parallelism
 * @param round
 *          the initial round index, used for naming each round output
 * @param LAST_ROUND
 *          the index of the last round
 * @param sampleRate
 *          if < 1, the input is sampled during normalization
 * @return the error
 * @throws Exception
 */
double runMapReduce(Configuration conf, DistributedRowMatrix distY, InitialValues initVal, Path output,
        final int nRows, final int nCols, final int nPCs, final int splitFactor, final float errSampleRate,
        final int LAST_ROUND, final int normalize) throws Exception {
    int round = 0;
    //The two PPCA variables that improve over each iteration
    double ss = initVal.ss;
    Matrix centralC = initVal.C;
    //initial CtC
    Matrix centralCtC = centralC.transpose().times(centralC);
    final float threshold = 0.00001f;
    int sampleRate = 1;
    //1. compute mean and span
    DenseVector ym = new DenseVector(distY.numCols()); //ym=mean(distY)
    MeanAndSpanJob masJob = new MeanAndSpanJob();
    boolean normalizeMean = false;
    if (normalize == 1)
        normalizeMean = true;
    Path meanSpanPath = masJob.compuateMeanAndSpan(distY.getRowPath(), output, ym, normalizeMean, conf,
            "" + round + "-init");
    Path normalizedYPath = null;

    //2. normalize the input matrix Y
    if (normalize == 1) {

        NormalizeJob normalizeJob = new NormalizeJob();
        normalizedYPath = normalizeJob.normalize(conf, distY.getRowPath(), meanSpanPath, output, sampleRate,
                "" + round + "-init");
        distY = new DistributedRowMatrix(normalizedYPath, getTempPath(), nRows, nCols);
        distY.setConf(conf);
        //After normalization, set the split factor
        if (splitFactor > 1) {
            FileSystem fss = FileSystem.get(normalizedYPath.toUri(), conf);
            long blockSize = fss.getDefaultBlockSize() / splitFactor;
            conf.set("mapred.max.split.size", Long.toString(blockSize));
        }
    }
    if (normalizedYPath == null)
        normalizedYPath = distY.getRowPath();

    //3. compute the 2-norm of Y
    Norm2Job normJob = new Norm2Job();
    double norm2 = normJob.computeFNorm(conf, normalizedYPath, meanSpanPath, getTempPath(),
            "" + round + "-init");
    if (sampleRate < 1) { // rescale
        norm2 = norm2 / sampleRate;
    }

    DenseVector xm = new DenseVector(nPCs);
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
    DistributedRowMatrix distY2X = null;
    DistributedRowMatrix distC = null;
    double prevObjective = Double.MAX_VALUE;
    double error = 0;
    double relChangeInObjective = Double.MAX_VALUE;
    double prevError = Double.MAX_VALUE;
    for (; (round < LAST_ROUND && relChangeInObjective > threshold && prevError > 0.02); round++) {
        // Sx = inv( ss * eye(d) + CtC );
        Matrix centralSx = centralCtC.clone();
        centralSx.viewDiagonal().assign(Functions.plus(ss));
        centralSx = inv(centralSx);
        // X = Y * C * Sx' => Y2X = C * Sx'
        Matrix centralY2X = centralC.times(centralSx.transpose());
        distY2X = PCACommon.toDistributedRowMatrix(centralY2X, getTempPath(), getTempPath(), "CSxt" + round);
        // Xm = Ym * Y2X
        PCACommon.denseVectorTimesMatrix(ym, centralY2X, xm);
        // We skip computing X as we generate it on demand using Y and Y2X

        //Compute X'X and Y'X 
        CompositeJob compositeJob = new CompositeJob();
        compositeJob.computeYtXandXtX(distY, distY2X, ym, xm, getTempPath(), conf, "" + round);
        Matrix centralXtX = compositeJob.xtx;
        Matrix centralYtX = compositeJob.ytx;
        if (sampleRate < 1) { // rescale
            centralXtX.assign(Functions.div(sampleRate));
            centralYtX.assign(Functions.div(sampleRate));
        }

        // XtX = X'*X + ss * Sx
        final double finalss = ss;
        centralXtX.assign(centralSx, new DoubleDoubleFunction() {
            @Override
            public double apply(double arg1, double arg2) {
                return arg1 + finalss * arg2;
            }
        });
        // C = (Ye'*X) / SumXtX;
        Matrix invXtX_central = inv(centralXtX);
        centralC = centralYtX.times(invXtX_central);
        distC = PCACommon.toDistributedRowMatrix(centralC, getTempPath(), getTempPath(), "C" + round);
        centralCtC = centralC.transpose().times(centralC);

        // Compute new value for ss
        // ss = ( sum(sum(Ye.^2)) + PCACommon.trace(XtX*CtC) - 2sum(XiCtYit) )
        // /(N*D);
        double ss2 = PCACommon.trace(centralXtX.times(centralCtC));
        VarianceJob varianceJob = new VarianceJob();
        double xctyt = varianceJob.computeVariance(distY, ym, distY2X, xm, distC, getTempPath(), conf,
                "" + round);
        if (sampleRate < 1) { // rescale
            xctyt = xctyt / sampleRate;
        }
        ss = (norm2 + ss2 - 2 * xctyt) / (nRows * nCols);
        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss + " (" + norm2 + " + " + ss2 + " -2* " + xctyt);
        double traceSx = PCACommon.trace(centralSx);
        double traceXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceXtX + " " + traceC + " " + traceCtC);

        double objective = ss;
        relChangeInObjective = Math.abs(1 - objective / prevObjective);
        prevObjective = objective;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, relChangeInObjective);
        if (!CALCULATE_ERR_ATTHEEND) {
            log.info("Computing the error at round " + round + " ...");
            ReconstructionErrJob errJob = new ReconstructionErrJob();
            error = errJob.reconstructionErr(distY, distY2X, distC, centralC, ym, xm, errSampleRate, conf,
                    getTempPath(), "" + round);
            log.info("... end of computing the error at round " + round);
            prevError = error;
        }
    }

    if (CALCULATE_ERR_ATTHEEND) {
        log.info("Computing the error at round " + round + " ...");
        ReconstructionErrJob errJob = new ReconstructionErrJob();
        error = errJob.reconstructionErr(distY, distY2X, distC, centralC, ym, xm, errSampleRate, conf,
                getTempPath(), "" + round);
        log.info("... end of computing the error at round " + round);
    }

    initVal.C = centralC;
    initVal.ss = ss;
    writeMatrix(initVal.C, output, getTempPath(), "PCs");
    return error;

}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * /* w w w  .j a v a2  s. c o m*/
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS)
        throws Exception {
    Matrix centralC = initVal.C;
    double ss = initVal.ss;
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    //ignore NaN elements by replacing them with 0
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }

    //centralize and normalize the input matrix
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    //also normalize the matrix by dividing each element by its columns range
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    Matrix centralX = null;
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // Sx = inv( eye(d) + CtC/ss );
        Matrix Sx = eye(nPCs).times(ss).plus(centralCtC);
        Sx = inv(Sx);
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(Sx.transpose());
        // XtX = X'*X + ss * Sx;
        Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss));
        // C = (Ye'*X) / XtX;
        Matrix tmpInv = inv(centralXtX);
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D);
        double norm2 = centralY.clone().assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC));
        //ss3 = sum (X(i:0) * C' * Y(i,:)')
        DenseVector resVector = new DenseVector(nCols);
        double xctyt = 0;
        for (int i = 0; i < nRows; i++) {
            PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector);
            double res = resVector.dot(centralY.viewRow(i));
            xctyt += res;
        }
        ss -= 2 * xctyt;
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(Sx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        double objective = ss;
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Ye is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * //from  w ww.  j  a  v  a2 s.c o m
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal,
        final int MAX_ROUNDS) {
    Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of
    // initVal
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    // Y = Y - mean(Ye)
    // Also normalize the matrix
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    // -------------------------- initialization
    // CtC = C'*C;
    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    // X = Ye * C * inv(CtC);
    Matrix centralX = centralY.times(centralC).times(inv(centralCtC));
    log.info("tracex= " + PCACommon.trace(centralX));
    // recon = X * C';
    Matrix recon = centralX.times(centralC.transpose());
    log.info("tracerec= " + PCACommon.trace(recon));
    // ss = sum(sum((recon-Ye).^2)) / (N*D-missing);
    double ss = recon.minus(centralY).assign(new DoubleFunction() {
        @Override
        public double apply(double arg1) {
            return arg1 * arg1;
        }
    }).zSum() / (nRows * nCols);
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // ------------------ E-step, (co)variances
        // Sx = inv( eye(d) + CtC/ss );
        Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss));
        centralSx = inv(centralSx);
        // ------------------ E-step expected value
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(centralSx.divide(ss));
        // ------------------ M-step
        // SumXtX = X'*X;
        Matrix centralSumXtX = centralX.transpose().times(centralX);
        // C = (Ye'*X) / (SumXtX + N*Sx );
        Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows)));
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) +
        // missing*ss_old ) /(N*D);
        recon = centralX.times(centralC.transpose());
        double error = recon.minus(centralY).assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum();
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(centralSx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralSumXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) )
        // +PCACommon.trace(SumXtX) -missing*log(ss_old);
        double objective = nRows * nCols + nRows
                * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant()))
                + PCACommon.trace(centralSumXtX);
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        System.out.printf("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Y is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.sparkpca.SparkPCA.java

/**
 * Compute principal component analysis where the input is an RDD<org.apache.spark.mllib.linalg.Vector> of vectors such that each vector represents a row in the matrix
 * @param sc //  w  ww .j a  va 2  s . co  m
 *           Spark context that contains the configuration parameters and represents connection to the cluster 
 *          (used to create RDDs, accumulators and broadcast variables on that cluster)
 * @param vectors
 *          An RDD of vectors representing the rows of the input matrix
 * @param nRows
 *          Number of rows in input Matrix
 * @param nCols
 *          Number of columns in input Matrix
 * @param nPCs
 *          Number of desired principal components
 * @param errRate
 *          The sampling rate that is used for computing the reconstruction error
 * @param maxIterations 
 *          Maximum number of iterations before terminating
 * @return Matrix of size nCols X nPCs having the desired principal components
 */
public static org.apache.spark.mllib.linalg.Matrix computePrincipalComponents(JavaSparkContext sc,
        JavaRDD<org.apache.spark.mllib.linalg.Vector> vectors, String outputPath, final int nRows,
        final int nCols, final int nPCs, final double errRate, final int maxIterations,
        final int computeProjectedMatrix) {

    System.out.println("Rows: " + nRows + ", Columns " + nCols);
    double ss;
    Matrix centralC;

    // The two PPCA variables that improve over each iteration: Principal component matrix (C) and random variance (ss).
    // Initialized randomly for the first iteration
    ss = PCAUtils.randSS();
    centralC = PCAUtils.randomMatrix(nCols, nPCs);

    // 1. Mean Job : This job calculates the mean and span of the columns of the input RDD<org.apache.spark.mllib.linalg.Vector>
    final Accumulator<double[]> matrixAccumY = sc.accumulator(new double[nCols], new VectorAccumulatorParam());
    final double[] internalSumY = new double[nCols];
    vectors.foreachPartition(new VoidFunction<Iterator<org.apache.spark.mllib.linalg.Vector>>() {

        public void call(Iterator<org.apache.spark.mllib.linalg.Vector> arg0) throws Exception {
            org.apache.spark.mllib.linalg.Vector yi;
            int[] indices = null;
            int i;
            while (arg0.hasNext()) {
                yi = arg0.next();
                indices = ((SparseVector) yi).indices();
                for (i = 0; i < indices.length; i++) {
                    internalSumY[indices[i]] += yi.apply(indices[i]);
                }
            }
            matrixAccumY.add(internalSumY);
        }

    });//End Mean Job

    //Get the sum of column Vector from the accumulator and divide each element by the number of rows to get the mean
    final Vector meanVector = new DenseVector(matrixAccumY.value()).divide(nRows);
    final Broadcast<Vector> br_ym_mahout = sc.broadcast(meanVector);

    //2. Frobenious Norm Job : Obtain Frobenius norm of the input RDD<org.apache.spark.mllib.linalg.Vector>
    /**
     * To compute the norm2 of a sparse matrix, iterate over sparse items and sum
     * square of the difference. After processing each row, add the sum of the
     * meanSquare of the zero-elements that were ignored in the sparse iteration.
    */
    double meanSquareSumTmp = 0;
    for (int i = 0; i < br_ym_mahout.value().size(); i++) {
        double element = br_ym_mahout.value().getQuick(i);
        meanSquareSumTmp += element * element;
    }
    final double meanSquareSum = meanSquareSumTmp;
    final Accumulator<Double> doubleAccumNorm2 = sc.accumulator(0.0);
    vectors.foreach(new VoidFunction<org.apache.spark.mllib.linalg.Vector>() {

        public void call(org.apache.spark.mllib.linalg.Vector yi) throws Exception {
            double norm2 = 0;
            double meanSquareSumOfZeroElements = meanSquareSum;
            int[] indices = ((SparseVector) yi).indices();
            int i;
            int index;
            double v;
            //iterate over non
            for (i = 0; i < indices.length; i++) {
                index = indices[i];
                v = yi.apply(index);
                double mean = br_ym_mahout.value().getQuick(index);
                double diff = v - mean;
                diff *= diff;

                // cancel the effect of the non-zero element in meanSquareSum
                meanSquareSumOfZeroElements -= mean * mean;
                norm2 += diff;
            }
            // For all all zero items, the following has the sum of mean square
            norm2 += meanSquareSumOfZeroElements;
            doubleAccumNorm2.add(norm2);
        }

    });//end Frobenious Norm Job

    double norm2 = doubleAccumNorm2.value();
    log.info("NOOOOORM2=" + norm2);

    // initial CtC  
    Matrix centralCtC = centralC.transpose().times(centralC);

    Matrix centralY2X = null;
    // -------------------------- EM Iterations
    // while count
    int round = 0;
    double prevObjective = Double.MAX_VALUE;
    double error = 0;
    double relChangeInObjective = Double.MAX_VALUE;
    double prevError = Double.MAX_VALUE;
    final float threshold = 0.00001f;
    final int firstRound = round;

    for (; (round < maxIterations && relChangeInObjective > threshold && prevError > 0.02); round++) {

        // Sx = inv( ss * eye(d) + CtC );
        Matrix centralSx = centralCtC.clone();

        // Sx = inv( eye(d) + CtC/ss );
        centralSx.viewDiagonal().assign(Functions.plus(ss));
        centralSx = PCAUtils.inv(centralSx);

        // X = Y * C * Sx' => Y2X = C * Sx'
        centralY2X = centralC.times(centralSx.transpose());

        //Broadcast Y2X because it will be used in several jobs and several iterations.
        final Broadcast<Matrix> br_centralY2X = sc.broadcast(centralY2X);

        // Xm = Ym * Y2X
        Vector xm_mahout = new DenseVector(nPCs);
        xm_mahout = PCAUtils.denseVectorTimesMatrix(br_ym_mahout.value(), centralY2X, xm_mahout);

        //Broadcast Xm because it will be used in several iterations.
        final Broadcast<Vector> br_xm_mahout = sc.broadcast(xm_mahout);
        // We skip computing X as we generate it on demand using Y and Y2X

        // 3. X'X and Y'X Job:  The job computes the two matrices X'X and Y'X
        /**
         * Xc = Yc * MEM   (MEM is the in-memory broadcasted matrix Y2X)
         * 
         * XtX = Xc' * Xc
         * 
         * YtX = Yc' * Xc
         * 
         * It also considers that Y is sparse and receives the mean vectors Ym and Xm
         * separately.
         * 
         * Yc = Y - Ym
         * 
         * Xc = X - Xm
         * 
         * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm
         * 
         * XtX = (X - Xm)' * (X - Xm)
         * 
         * YtX = (Y - Ym)' * (X - Xm)
         * 
        */
        final Accumulator<double[][]> matrixAccumXtx = sc.accumulator(new double[nPCs][nPCs],
                new MatrixAccumulatorParam());
        final Accumulator<double[][]> matrixAccumYtx = sc.accumulator(new double[nCols][nPCs],
                new MatrixAccumulatorParam());
        final Accumulator<double[]> matrixAccumX = sc.accumulator(new double[nPCs],
                new VectorAccumulatorParam());

        /*
         * Initialize the output matrices and vectors once in order to avoid generating massive intermediate data in the workers
         */
        final double[][] resArrayYtX = new double[nCols][nPCs];
        final double[][] resArrayXtX = new double[nPCs][nPCs];
        final double[] resArrayX = new double[nPCs];

        /*
         * Used to sum the vectors in one partition.
        */
        final double[][] internalSumYtX = new double[nCols][nPCs];
        final double[][] internalSumXtX = new double[nPCs][nPCs];
        final double[] internalSumX = new double[nPCs];

        vectors.foreachPartition(new VoidFunction<Iterator<org.apache.spark.mllib.linalg.Vector>>() {

            public void call(Iterator<org.apache.spark.mllib.linalg.Vector> arg0) throws Exception {
                org.apache.spark.mllib.linalg.Vector yi;
                while (arg0.hasNext()) {
                    yi = arg0.next();

                    /*
                      * Perform in-memory matrix multiplication xi = yi' * Y2X
                     */
                    PCAUtils.sparseVectorTimesMatrix(yi, br_centralY2X.value(), resArrayX);

                    //get only the sparse indices
                    int[] indices = ((SparseVector) yi).indices();

                    PCAUtils.outerProductWithIndices(yi, br_ym_mahout.value(), resArrayX, br_xm_mahout.value(),
                            resArrayYtX, indices);
                    PCAUtils.outerProductArrayInput(resArrayX, br_xm_mahout.value(), resArrayX,
                            br_xm_mahout.value(), resArrayXtX);
                    int i, j, rowIndexYtX;

                    //add the sparse indices only
                    for (i = 0; i < indices.length; i++) {
                        rowIndexYtX = indices[i];
                        for (j = 0; j < nPCs; j++) {
                            internalSumYtX[rowIndexYtX][j] += resArrayYtX[rowIndexYtX][j];
                            resArrayYtX[rowIndexYtX][j] = 0; //reset it
                        }

                    }
                    for (i = 0; i < nPCs; i++) {
                        internalSumX[i] += resArrayX[i];
                        for (j = 0; j < nPCs; j++) {
                            internalSumXtX[i][j] += resArrayXtX[i][j];
                            resArrayXtX[i][j] = 0; //reset it
                        }

                    }
                }
                matrixAccumX.add(internalSumX);
                matrixAccumXtx.add(internalSumXtX);
                matrixAccumYtx.add(internalSumYtX);
            }

        });//end  X'X and Y'X Job

        /*
         * Get the values of the accumulators.
        */
        Matrix centralYtX = new DenseMatrix(matrixAccumYtx.value());
        Matrix centralXtX = new DenseMatrix(matrixAccumXtx.value());
        Vector centralSumX = new DenseVector(matrixAccumX.value());

        /*
          * Mi = (Yi-Ym)' x (Xi-Xm) = Yi' x (Xi-Xm) - Ym' x (Xi-Xm)
          * 
          * M = Sum(Mi) = Sum(Yi' x (Xi-Xm)) - Ym' x (Sum(Xi)-N*Xm)
          * 
          * The first part is done in the previous job and the second in the following method
         */
        centralYtX = PCAUtils.updateXtXAndYtx(centralYtX, centralSumX, br_ym_mahout.value(), xm_mahout, nRows);
        centralXtX = PCAUtils.updateXtXAndYtx(centralXtX, centralSumX, xm_mahout, xm_mahout, nRows);

        // XtX = X'*X + ss * Sx
        final double finalss = ss;
        centralXtX.assign(centralSx, new DoubleDoubleFunction() {
            @Override
            public double apply(double arg1, double arg2) {
                return arg1 + finalss * arg2;
            }
        });

        // C = (Ye'*X) / SumXtX;
        Matrix invXtX_central = PCAUtils.inv(centralXtX);
        centralC = centralYtX.times(invXtX_central);
        centralCtC = centralC.transpose().times(centralC);

        // Compute new value for ss
        // ss = ( sum(sum(Ye.^2)) + trace(XtX*CtC) - 2sum(XiCtYit))/(N*D);

        //4. Variance Job: Computes part of variance that requires a distributed job
        /**
         * xcty = Sum (xi * C' * yi')
         * 
         * We also regenerate xi on demand by the following formula:
         * 
         * xi = yi * y2x
         * 
         * To make it efficient for sparse matrices that are not mean-centered, we receive the mean
         * separately:
         * 
         * xi = (yi - ym) * y2x = yi * y2x - xm, where xm = ym*y2x
         * 
         * xi * C' * (yi-ym)' = xi * ((yi-ym)*C)' = xi * (yi*C - ym*C)'
         * 
         */

        double ss2 = PCAUtils.trace(centralXtX.times(centralCtC));
        final double[] resArrayYmC = new double[centralC.numCols()];
        PCAUtils.denseVectorTimesMatrix(meanVector, centralC, resArrayYmC);
        final Broadcast<Matrix> br_centralC = sc.broadcast(centralC);
        final double[] resArrayYiC = new double[centralC.numCols()];
        final Accumulator<Double> doubleAccumXctyt = sc.accumulator(0.0);
        vectors.foreach(new VoidFunction<org.apache.spark.mllib.linalg.Vector>() {
            public void call(org.apache.spark.mllib.linalg.Vector yi) throws Exception {
                PCAUtils.sparseVectorTimesMatrix(yi, br_centralY2X.value(), resArrayX);
                PCAUtils.sparseVectorTimesMatrix(yi, br_centralC.value(), resArrayYiC);

                PCAUtils.subtract(resArrayYiC, resArrayYmC);
                double dotRes = PCAUtils.dot(resArrayX, resArrayYiC);
                doubleAccumXctyt.add(dotRes);
            }
        }); //end Variance Job

        double xctyt = doubleAccumXctyt.value();
        ss = (norm2 + ss2 - 2 * xctyt) / (nRows * nCols);
        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss + " (" + norm2 + " + " + ss2 + " -2* " + xctyt);

        double objective = ss;
        relChangeInObjective = Math.abs(1 - objective / prevObjective);
        prevObjective = objective;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, relChangeInObjective);

        if (!CALCULATE_ERR_ATTHEEND) {
            log.info("Computing the error at round " + round + " ...");
            System.out.println("Computing the error at round " + round + " ...");

            double[] resArrayZmTmp = new double[centralC.numRows()];
            PCAUtils.vectorTimesMatrixTranspose(xm_mahout, centralC, resArrayZmTmp);
            final double[] resArrayZm = PCAUtils.subtractVectorArray(resArrayZmTmp, meanVector);

            //5. Reconstruction Error Job: The job computes the reconstruction error of the input matrix after updating the principal 
            //components matrix
            /**
             * Xc = Yc * Y2X
             * 
             * ReconY = Xc * C'
             * 
             * Err = ReconY - Yc
             * 
             * Norm2(Err) = abs(Err).zSum().max()
             * 
             * To take the sparse matrixes into account we receive the mean separately:
             * 
             * X = (Y - Ym) * Y2X = X - Xm, where X=Y*Y2X and Xm=Ym*Y2X
             * 
             * ReconY = (X - Xm) * C' = X*C' - Xm*C'
             * 
             * Err = X*C' - Xm*C' - (Y - Ym) = X*C' - Y - Zm, where where Zm=Xm*C'-Ym
             * 
             */

            final Accumulator<double[]> vectorAccumErr = sc.accumulator(new double[nCols],
                    new VectorAccumulatorAbsParam());
            final Accumulator<double[]> vectorAccumNormCentralized = sc.accumulator(new double[nCols],
                    new VectorAccumulatorAbsParam());
            final double[] xiCtArray = new double[centralC.numRows()];
            final double[] normalizedArray = new double[nCols];

            vectors.foreach(new VoidFunction<org.apache.spark.mllib.linalg.Vector>() {
                public void call(org.apache.spark.mllib.linalg.Vector yi) throws Exception {
                    if (PCAUtils.pass(errRate))
                        return;

                    PCAUtils.sparseVectorTimesMatrix(yi, br_centralY2X.value(), resArrayX);
                    PCAUtils.vectorTimesMatrixTranspose(resArrayX, br_centralC.value(), xiCtArray);

                    PCAUtils.denseVectorSubtractSparseSubtractDense(xiCtArray, yi, resArrayZm);
                    vectorAccumErr.add(xiCtArray);

                    PCAUtils.denseVectorMinusVector(yi, br_ym_mahout.value(), normalizedArray);
                    vectorAccumNormCentralized.add(normalizedArray);
                }
            }); // end Reconstruction Error Job

            /*
             * The norm of the reconstruction error matrix
             */
            double reconstructionError = PCAUtils.getMax(vectorAccumErr.value());
            log.info("************************ReconsructionError=" + reconstructionError);

            /*
             * The norm of the input matrix after centralization
            */
            double centralizedYNorm = PCAUtils.getMax(vectorAccumNormCentralized.value());
            log.info("************************CentralizedNOrm=" + centralizedYNorm);

            error = reconstructionError / centralizedYNorm;
            log.info("... end of computing the error at round " + round + " And error=" + error);

            prevError = error;
        }
    }
    if (computeProjectedMatrix == 1) {
        //7. Compute Projected Matrix Job: The job multiplies the input matrix Y with the principal components C to get the Projected vectors 
        final Broadcast<Matrix> br_centralC = sc.broadcast(centralC);
        JavaRDD<org.apache.spark.mllib.linalg.Vector> projectedVectors = vectors.map(
                new Function<org.apache.spark.mllib.linalg.Vector, org.apache.spark.mllib.linalg.Vector>() {

                    public org.apache.spark.mllib.linalg.Vector call(org.apache.spark.mllib.linalg.Vector yi)
                            throws Exception {

                        return PCAUtils.sparseVectorTimesMatrix(yi, br_centralC.value());
                    }
                });//End Compute Projected Matrix
        String path = outputPath + File.separator + "ProjectedMatrix";
        projectedVectors.saveAsTextFile(path);
    }
    return PCAUtils.convertMahoutToSparkMatrix(centralC);

}