Example usage for weka.core.matrix SingularValueDecomposition getV

List of usage examples for weka.core.matrix SingularValueDecomposition getV

Introduction

In this page you can find the example usage for weka.core.matrix SingularValueDecomposition getV.

Prototype

public Matrix getV() 

Source Link

Document

Return the right singular vectors

Usage

From source file:meka.classifiers.multilabel.PLST.java

License:Open Source License

/**
 * The method to transform the labels into another set of latent labels,
 * typically a compression method is used, e.g., Boolean matrix decomposition
 * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST.
 *
 * @param D the instances to transform into new instances with transformed labels. The
 * Instances consist of features and original labels.
 * @return The resulting instances. Instances consist of features and transformed labels.
 *//*  w  w w .j ava 2s  . c o  m*/
@Override
public Instances transformLabels(Instances D) throws Exception {
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels);

    // first, lets do the preprocessing as in the original implementation
    double[] averages = new double[labels.numAttributes()];

    for (int i = 0; i < labels.numAttributes(); i++) {
        double[] column = labels.attributeToDoubleArray(i);
        double sum = 0.0;
        for (int j = 0; j < column.length; j++) {
            if (column[j] == 1.0) {
                sum += 1.0;
            } else {
                sum += -1;
                // The algorithm needs 1/-1 coding, so let's
                // change the matrix here
                labelMatrix.set(j, i, -1.0);
            }
        }
        averages[i] = sum / column.length;
    }

    double[][] shiftMatrix = new double[1][labels.numAttributes()];

    shiftMatrix[0] = averages;

    // remember shift for prediction
    this.m_Shift = new Matrix(shiftMatrix);

    double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()];

    for (int i = 0; i < labels.numInstances(); i++) {
        shiftTrainMatrix[i] = averages;
    }

    Matrix trainShift = new Matrix(shiftTrainMatrix);

    SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift));

    // The paper uses U here, but the implementation by the authors uses V, so
    // we used V here too.
    m_v = svd.getV();

    //remove columns so only size are left
    double[][] newArr = new double[m_v.getRowDimension()][this.getSize()];

    for (int i = 0; i < newArr.length; i++) {
        for (int j = 0; j < newArr[i].length; j++) {
            newArr[i][j] = m_v.getArray()[i][j];
        }
    }

    m_v = new Matrix(newArr);

    // now the multiplication (last step of the algorithm)
    Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v);

    // and transform it to Instances
    ArrayList<Attribute> attinfos = new ArrayList<Attribute>();

    for (int i = 0; i < compressed.getColumnDimension(); i++) {

        Attribute att = new Attribute("att" + i);
        attinfos.add(att);
    }

    // create pattern instances (also used in prediction) note: this is a regression
    // problem now, labels are not binary
    this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension());

    // fill result Instances
    Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances),
            features);

    result.setClassIndex(this.getSize());
    return result;
}

From source file:mulan.transformations.ColumnSubsetSelection.java

License:Open Source License

public MultiLabelInstances transform(MultiLabelInstances data, int kappa, long seed) {
    try {//ww w  .  j  a v a  2  s  .  c o m

        if (kappa >= data.getNumLabels()) {
            throw new MulanRuntimeException(
                    "Dimensionality reduction parameter should not exceed or be equal to the total count of labels!");
        }

        // integer indices of physical label assignments
        int[] labelIndices = data.getLabelIndices();
        int[] indices = new int[labelIndices.length];

        System.arraycopy(labelIndices, 0, indices, 0, labelIndices.length);

        // load label indicator matrix in a Matrix object
        double[][] datmatrix = new double[data.getDataSet().numInstances()][labelIndices.length];
        Matrix mat = new Matrix(datmatrix);

        for (int i = 0; i < data.getDataSet().numInstances(); i++) {
            Instance instance = data.getDataSet().instance(i);
            for (int j = 0; j < labelIndices.length; j++) {
                mat.set(i, j, Double.parseDouble(instance.toString(labelIndices[j])));
                //DEBUG: System.out.print("" + Double.parseDouble(instance.toString(labelIndices[j])) + ",");
            }
        }

        // make private copy of the label matrix
        this.Y = mat;

        // compute eigenvalue analysis of label indicator matrix
        SingularValueDecomposition svd = new SingularValueDecomposition(mat);

        //DEBUG: System.out.println("rows = " + svd.getV().getRowDimension() + ", cols = " + svd.getV().getColumnDimension());

        assert (svd.getV().getRowDimension() == svd.getV().getColumnDimension());

        Matrix rVec = svd.getV();
        Matrix Vk = new Matrix(new double[svd.getV().getRowDimension()][kappa]);

        // snippet (2)
        for (int i = 0; i < kappa; i++) {
            for (int j = 0; j < svd.getV().getColumnDimension(); j++) {
                Vk.set(j, i, rVec.get(i, j));
            }
        }

        // compute column selection probabilitites
        double[] selectionProbabilities = new double[Vk.getRowDimension()];
        double[] selectionProbabilitiesCDF = new double[Vk.getRowDimension()];

        for (int i = 0; i < Vk.getRowDimension(); i++) {
            selectionProbabilities[i] = 0.0;
            for (int j = 0; j < kappa; j++) {
                selectionProbabilities[i] += Math.pow(Vk.get(i, j), 2);
            }
            selectionProbabilities[i] = Math.sqrt(selectionProbabilities[i]);
        }

        // normalize probabilities
        double psum = 0.0;
        for (int i = 0; i < Vk.getRowDimension(); i++) {
            psum += selectionProbabilities[i];
            //System.out.println("psum = " + psum);
        }
        //System.out.println("psum = " + psum);
        //assert (psum != 0 && psum == 1.0); // must be non-zero and unitary

        for (int i = 0; i < Vk.getRowDimension(); i++) {
            selectionProbabilities[i] /= psum;
        }

        psum = 0.0;
        for (int i = 0; i < Vk.getRowDimension(); i++) {
            psum += selectionProbabilities[i];
            selectionProbabilitiesCDF[i] = psum;
        }

        // add selected columns on a linked list
        sampledIndiceSet = new java.util.HashSet();

        // run column-sampling loop
        int sampling_count = 0;

        Random generator = new Random(seed);
        while (sampledIndiceSet.size() < kappa) // ...loop until knapsack gets filled...
        {
            // pick a random number

            //DEBUG:
            //double roulette = generator.nextDouble() * 0.5;
            double roulette = generator.nextDouble();

            // seek closest match according to sampling probabilities
            int closest_match = -1;

            // iterate label cols
            for (int i = 0; i < Vk.getRowDimension(); i++) {
                if (roulette < selectionProbabilitiesCDF[i]) // ...spot a possible match...
                {
                    // ...if so, select and quit scope...
                    closest_match = i; // BEWARE! "i" is an index over the label enumeration, not an ordering index!
                    break;
                }
            }

            // if we stepped on the flag, something serious is going on!
            assert (closest_match != -1);

            // see if column was selected; if not, add it
            if (!sampledIndiceSet.contains((Object) closest_match)) {
                sampledIndiceSet.add((Object) closest_match);
                //System.out.println("DEBUG(CSSP): Added column " + closest_match + " to the sampled column set!");
            }

            sampling_count += 1;
        }

        System.out.println("Sampling loop completed in " + sampling_count + " runs.");

        // compute indices-to-remove array
        indicesToRemove = new int[labelIndices.length - sampledIndiceSet.size()];

        // compute all **PHYSICAL** (not VIRTUAL) indices of label columns for CSSP to remove
        int idx = 0;
        for (int i = 0; i < labelIndices.length; i++) {
            if (!sampledIndiceSet.contains((Object) i)) {
                indicesToRemove[idx] = indices[i];
                idx += 1;
            }
        }

        // apply CSSP: select columns to remove
        int[] selectedIndicesObj = indicesToRemove.clone();
        selectedIndicesInt = new int[selectedIndicesObj.length];
        for (int i = 0; i < selectedIndicesObj.length; i++) {
            selectedIndicesInt[i] = (int) selectedIndicesObj[i];
        }

        // compute Moore-Penrose pseudo-inverse matrix of the column-reduced label indicator matrix
        double[][] datmatrix2 = new double[data.getDataSet().numInstances()][labelIndices.length
                - selectedIndicesInt.length];
        Matrix matC = new Matrix(datmatrix2);

        //DEBUG:
        //System.out.println("Selecting only " + matC.getColumnDimension() + " columns; removing " + selectedIndicesInt.length + " columns out of an original total of " + data.getLabelIndices().length + " labels!");

        // compute indices to keep
        java.util.LinkedList<Integer> indicesToKeep = new java.util.LinkedList();
        for (int i = 0; i < labelIndices.length; i++) {
            boolean keep = true;

            // see if this col has to be removed
            for (int k = 0; k < selectedIndicesInt.length; k++) {
                if (selectedIndicesInt[k] == labelIndices[i]) {
                    keep = false;
                    break;
                }
            }

            // add if we actually should keep this...
            if (keep) {
                indicesToKeep.add(labelIndices[i]);
            }
        }

        assert (indicesToKeep.size() == matC.getColumnDimension());

        for (int i = 0; i < matC.getRowDimension(); i++) {
            // get data instance
            Instance instance = data.getDataSet().instance(i);

            // replicate data from ALL columns that WOULD not be removed by CSSP           
            for (int j = 0; j < matC.getColumnDimension(); j++) {
                // get label indice
                int corrIdx = (int) indicesToKeep.get(j);

                // update matC
                matC.set(i, j, Double.parseDouble(instance.toString(corrIdx)));
            }
        }

        //DEBUG: System.out.println("matC rows = " + matC.getRowDimension() + ", cols = " + matC.getColumnDimension() + "\n data original label cols # = " + data.getLabelIndices().length);

        // make private copy of projection matrices

        // Moore-Penrose pseudo-inverse of the label matrix matC
        // see http://robotics.caltech.edu/~jwb/courses/ME115/handouts/pseudo.pdf for an SVD-based workaround for MP-inverse

        // Moore-Penrose pseudoinverse computation based on Singular Value Decomposition (SVD)
        /*
         SingularValueDecomposition decomp = Vk.svd();
                
         Matrix S = decomp.getS();
         Matrix Scross = new Matrix(selectedIndicesInt.length,selectedIndicesInt.length);
         for(int i = 0; i < selectedIndicesInt.length; i++) {
         for(int j = 0; j < selectedIndicesInt.length; j++) {
         if(i == j) {
         if(S.get(i, j) == 0) {
         Scross.set(i, j, 0.0);
         } else {
         Scross.set(i, j, 1 / S.get(i, j));
         }
         } else {
         Scross.set(i, j, 0.0);
         }
         }
         }
                
         this.Yc = decomp.getV().times(Scross).times(decomp.getU().transpose());
         */

        // DEBUG: traditional way of computing the Moore-Penrose pseudoinverse
        if (matC.getRowDimension() >= matC.getColumnDimension()) {
            this.Yc = ((matC.transpose().times(matC)).inverse()).times(matC.transpose());
        } else {
            this.Yc = matC.transpose().times((matC.times(matC.transpose()).inverse()));
        }

        //System.out.println("Yc rows: " + Yc.getRowDimension() + "\nYc cols: " + Yc.getColumnDimension() + "\n Y rows: " + Y.getRowDimension() + "\nY cols: " + Y.getColumnDimension());

        this.ProjectionMatrix = Yc.times(Y); // compute projection matrix

        // add sampled indices to Remove object
        remove = new Remove();
        remove.setAttributeIndicesArray(selectedIndicesInt);
        remove.setInvertSelection(false);
        remove.setInputFormat(data.getDataSet());

        // apply remove filter on the labels
        transformed = Filter.useFilter(data.getDataSet(), remove);

        this.sampledIndicesObj = indicesToKeep.toArray();

        return data.reintegrateModifiedDataSet(transformed);

    } catch (Exception ex) {
        // do nothing
        //Logger.getLogger(BinaryRelevanceTransformation.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}