List of usage examples for weka.core.matrix SingularValueDecomposition getV
public Matrix getV()
From source file:meka.classifiers.multilabel.PLST.java
License:Open Source License
/** * The method to transform the labels into another set of latent labels, * typically a compression method is used, e.g., Boolean matrix decomposition * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST. * * @param D the instances to transform into new instances with transformed labels. The * Instances consist of features and original labels. * @return The resulting instances. Instances consist of features and transformed labels. *//* w w w .j ava 2s . c o m*/ @Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels); // first, lets do the preprocessing as in the original implementation double[] averages = new double[labels.numAttributes()]; for (int i = 0; i < labels.numAttributes(); i++) { double[] column = labels.attributeToDoubleArray(i); double sum = 0.0; for (int j = 0; j < column.length; j++) { if (column[j] == 1.0) { sum += 1.0; } else { sum += -1; // The algorithm needs 1/-1 coding, so let's // change the matrix here labelMatrix.set(j, i, -1.0); } } averages[i] = sum / column.length; } double[][] shiftMatrix = new double[1][labels.numAttributes()]; shiftMatrix[0] = averages; // remember shift for prediction this.m_Shift = new Matrix(shiftMatrix); double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()]; for (int i = 0; i < labels.numInstances(); i++) { shiftTrainMatrix[i] = averages; } Matrix trainShift = new Matrix(shiftTrainMatrix); SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift)); // The paper uses U here, but the implementation by the authors uses V, so // we used V here too. m_v = svd.getV(); //remove columns so only size are left double[][] newArr = new double[m_v.getRowDimension()][this.getSize()]; for (int i = 0; i < newArr.length; i++) { for (int j = 0; j < newArr[i].length; j++) { newArr[i][j] = m_v.getArray()[i][j]; } } m_v = new Matrix(newArr); // now the multiplication (last step of the algorithm) Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v); // and transform it to Instances ArrayList<Attribute> attinfos = new ArrayList<Attribute>(); for (int i = 0; i < compressed.getColumnDimension(); i++) { Attribute att = new Attribute("att" + i); attinfos.add(att); } // create pattern instances (also used in prediction) note: this is a regression // problem now, labels are not binary this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension()); // fill result Instances Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances), features); result.setClassIndex(this.getSize()); return result; }
From source file:mulan.transformations.ColumnSubsetSelection.java
License:Open Source License
public MultiLabelInstances transform(MultiLabelInstances data, int kappa, long seed) { try {//ww w . j a v a 2 s . c o m if (kappa >= data.getNumLabels()) { throw new MulanRuntimeException( "Dimensionality reduction parameter should not exceed or be equal to the total count of labels!"); } // integer indices of physical label assignments int[] labelIndices = data.getLabelIndices(); int[] indices = new int[labelIndices.length]; System.arraycopy(labelIndices, 0, indices, 0, labelIndices.length); // load label indicator matrix in a Matrix object double[][] datmatrix = new double[data.getDataSet().numInstances()][labelIndices.length]; Matrix mat = new Matrix(datmatrix); for (int i = 0; i < data.getDataSet().numInstances(); i++) { Instance instance = data.getDataSet().instance(i); for (int j = 0; j < labelIndices.length; j++) { mat.set(i, j, Double.parseDouble(instance.toString(labelIndices[j]))); //DEBUG: System.out.print("" + Double.parseDouble(instance.toString(labelIndices[j])) + ","); } } // make private copy of the label matrix this.Y = mat; // compute eigenvalue analysis of label indicator matrix SingularValueDecomposition svd = new SingularValueDecomposition(mat); //DEBUG: System.out.println("rows = " + svd.getV().getRowDimension() + ", cols = " + svd.getV().getColumnDimension()); assert (svd.getV().getRowDimension() == svd.getV().getColumnDimension()); Matrix rVec = svd.getV(); Matrix Vk = new Matrix(new double[svd.getV().getRowDimension()][kappa]); // snippet (2) for (int i = 0; i < kappa; i++) { for (int j = 0; j < svd.getV().getColumnDimension(); j++) { Vk.set(j, i, rVec.get(i, j)); } } // compute column selection probabilitites double[] selectionProbabilities = new double[Vk.getRowDimension()]; double[] selectionProbabilitiesCDF = new double[Vk.getRowDimension()]; for (int i = 0; i < Vk.getRowDimension(); i++) { selectionProbabilities[i] = 0.0; for (int j = 0; j < kappa; j++) { selectionProbabilities[i] += Math.pow(Vk.get(i, j), 2); } selectionProbabilities[i] = Math.sqrt(selectionProbabilities[i]); } // normalize probabilities double psum = 0.0; for (int i = 0; i < Vk.getRowDimension(); i++) { psum += selectionProbabilities[i]; //System.out.println("psum = " + psum); } //System.out.println("psum = " + psum); //assert (psum != 0 && psum == 1.0); // must be non-zero and unitary for (int i = 0; i < Vk.getRowDimension(); i++) { selectionProbabilities[i] /= psum; } psum = 0.0; for (int i = 0; i < Vk.getRowDimension(); i++) { psum += selectionProbabilities[i]; selectionProbabilitiesCDF[i] = psum; } // add selected columns on a linked list sampledIndiceSet = new java.util.HashSet(); // run column-sampling loop int sampling_count = 0; Random generator = new Random(seed); while (sampledIndiceSet.size() < kappa) // ...loop until knapsack gets filled... { // pick a random number //DEBUG: //double roulette = generator.nextDouble() * 0.5; double roulette = generator.nextDouble(); // seek closest match according to sampling probabilities int closest_match = -1; // iterate label cols for (int i = 0; i < Vk.getRowDimension(); i++) { if (roulette < selectionProbabilitiesCDF[i]) // ...spot a possible match... { // ...if so, select and quit scope... closest_match = i; // BEWARE! "i" is an index over the label enumeration, not an ordering index! break; } } // if we stepped on the flag, something serious is going on! assert (closest_match != -1); // see if column was selected; if not, add it if (!sampledIndiceSet.contains((Object) closest_match)) { sampledIndiceSet.add((Object) closest_match); //System.out.println("DEBUG(CSSP): Added column " + closest_match + " to the sampled column set!"); } sampling_count += 1; } System.out.println("Sampling loop completed in " + sampling_count + " runs."); // compute indices-to-remove array indicesToRemove = new int[labelIndices.length - sampledIndiceSet.size()]; // compute all **PHYSICAL** (not VIRTUAL) indices of label columns for CSSP to remove int idx = 0; for (int i = 0; i < labelIndices.length; i++) { if (!sampledIndiceSet.contains((Object) i)) { indicesToRemove[idx] = indices[i]; idx += 1; } } // apply CSSP: select columns to remove int[] selectedIndicesObj = indicesToRemove.clone(); selectedIndicesInt = new int[selectedIndicesObj.length]; for (int i = 0; i < selectedIndicesObj.length; i++) { selectedIndicesInt[i] = (int) selectedIndicesObj[i]; } // compute Moore-Penrose pseudo-inverse matrix of the column-reduced label indicator matrix double[][] datmatrix2 = new double[data.getDataSet().numInstances()][labelIndices.length - selectedIndicesInt.length]; Matrix matC = new Matrix(datmatrix2); //DEBUG: //System.out.println("Selecting only " + matC.getColumnDimension() + " columns; removing " + selectedIndicesInt.length + " columns out of an original total of " + data.getLabelIndices().length + " labels!"); // compute indices to keep java.util.LinkedList<Integer> indicesToKeep = new java.util.LinkedList(); for (int i = 0; i < labelIndices.length; i++) { boolean keep = true; // see if this col has to be removed for (int k = 0; k < selectedIndicesInt.length; k++) { if (selectedIndicesInt[k] == labelIndices[i]) { keep = false; break; } } // add if we actually should keep this... if (keep) { indicesToKeep.add(labelIndices[i]); } } assert (indicesToKeep.size() == matC.getColumnDimension()); for (int i = 0; i < matC.getRowDimension(); i++) { // get data instance Instance instance = data.getDataSet().instance(i); // replicate data from ALL columns that WOULD not be removed by CSSP for (int j = 0; j < matC.getColumnDimension(); j++) { // get label indice int corrIdx = (int) indicesToKeep.get(j); // update matC matC.set(i, j, Double.parseDouble(instance.toString(corrIdx))); } } //DEBUG: System.out.println("matC rows = " + matC.getRowDimension() + ", cols = " + matC.getColumnDimension() + "\n data original label cols # = " + data.getLabelIndices().length); // make private copy of projection matrices // Moore-Penrose pseudo-inverse of the label matrix matC // see http://robotics.caltech.edu/~jwb/courses/ME115/handouts/pseudo.pdf for an SVD-based workaround for MP-inverse // Moore-Penrose pseudoinverse computation based on Singular Value Decomposition (SVD) /* SingularValueDecomposition decomp = Vk.svd(); Matrix S = decomp.getS(); Matrix Scross = new Matrix(selectedIndicesInt.length,selectedIndicesInt.length); for(int i = 0; i < selectedIndicesInt.length; i++) { for(int j = 0; j < selectedIndicesInt.length; j++) { if(i == j) { if(S.get(i, j) == 0) { Scross.set(i, j, 0.0); } else { Scross.set(i, j, 1 / S.get(i, j)); } } else { Scross.set(i, j, 0.0); } } } this.Yc = decomp.getV().times(Scross).times(decomp.getU().transpose()); */ // DEBUG: traditional way of computing the Moore-Penrose pseudoinverse if (matC.getRowDimension() >= matC.getColumnDimension()) { this.Yc = ((matC.transpose().times(matC)).inverse()).times(matC.transpose()); } else { this.Yc = matC.transpose().times((matC.times(matC.transpose()).inverse())); } //System.out.println("Yc rows: " + Yc.getRowDimension() + "\nYc cols: " + Yc.getColumnDimension() + "\n Y rows: " + Y.getRowDimension() + "\nY cols: " + Y.getColumnDimension()); this.ProjectionMatrix = Yc.times(Y); // compute projection matrix // add sampled indices to Remove object remove = new Remove(); remove.setAttributeIndicesArray(selectedIndicesInt); remove.setInvertSelection(false); remove.setInputFormat(data.getDataSet()); // apply remove filter on the labels transformed = Filter.useFilter(data.getDataSet(), remove); this.sampledIndicesObj = indicesToKeep.toArray(); return data.reintegrateModifiedDataSet(transformed); } catch (Exception ex) { // do nothing //Logger.getLogger(BinaryRelevanceTransformation.class.getName()).log(Level.SEVERE, null, ex); return null; } }