List of usage examples for weka.core Utils sort
public staticint[] sort(double[] array)
From source file:PrincipalComponents.java
License:Open Source License
private void buildAttributeConstructor(Instances data) throws Exception { m_eigenvalues = null;//w ww .ja va2 s. c om m_outputNumAtts = -1; m_attributeFilter = null; m_nominalToBinFilter = null; m_sumOfEigenValues = 0.0; m_trainInstances = new Instances(data); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainHeader = new Instances(m_trainInstances, 0); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); /* * if (m_normalize) { m_normalizeFilter = new Normalize(); * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances * = Filter.useFilter(m_trainInstances, m_normalizeFilter); } */ m_nominalToBinFilter = new NominalToBinary(); m_nominalToBinFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter); // delete any attributes with only one distinct value or are all missing Vector<Integer> deleteCols = new Vector<Integer>(); for (int i = 0; i < m_trainInstances.numAttributes(); i++) { if (m_trainInstances.numDistinctValues(i) <= 1) { deleteCols.addElement(new Integer(i)); } } if (m_trainInstances.classIndex() >= 0) { // get rid of the class column m_hasClass = true; m_classIndex = m_trainInstances.classIndex(); deleteCols.addElement(new Integer(m_classIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int[] todelete = new int[deleteCols.size()]; for (int i = 0; i < deleteCols.size(); i++) { todelete[i] = (deleteCols.elementAt(i)).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_trainInstances); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); fillCovariance(); SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation); m_eigenvectors = Matrices.getArray(evd.getEigenvectors()); m_eigenvalues = evd.getEigenvalues(); /* * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < * m_numAttribs; j++) { System.err.println(v[i][j] + " "); } * System.err.println(d[i]); } */ // any eigenvalues less than 0 are not worth anything --- change to 0 for (int i = 0; i < m_eigenvalues.length; i++) { if (m_eigenvalues[i] < 0) { m_eigenvalues[i] = 0.0; } } m_sortedEigens = Utils.sort(m_eigenvalues); m_sumOfEigenValues = Utils.sum(m_eigenvalues); m_transformedFormat = setOutputFormat(); if (m_transBackToOriginal) { m_originalSpaceFormat = setOutputFormatOriginal(); // new ordered eigenvector matrix int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes() : m_transformedFormat.numAttributes() - 1; double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1]; // try converting back to the original space for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { for (int j = 0; j < m_numAttribs; j++) { orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]]; } } // transpose the matrix int nr = orderedVectors.length; int nc = orderedVectors[0].length; m_eTranspose = new double[nc][nr]; for (int i = 0; i < nc; i++) { for (int j = 0; j < nr; j++) { m_eTranspose[i][j] = orderedVectors[j][i]; } } } }
From source file:PrincipalComponents.java
License:Open Source License
/** * Set the format for the transformed data * * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set *//* w w w. j a v a2 s .c o m*/ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) { coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); } int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing // magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) { coeff_inds[j] = j; } } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) { attName.append("+"); } attName.append( Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) { attName.append("..."); } attributes.add(new Attribute(attName.toString())); cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.add((Attribute) m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
From source file:REPTree.java
License:Open Source License
/** * Builds classifier./*from w w w . j a v a 2 s . co m*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier./*from w w w . j ava2s.co m*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:PCADetector.java
License:Apache License
public int[] Sort(double[] eigenValues, double[][] eigenVectors) { int numDims = eigenValues.length; int numRows = eigenVectors.length; int[] order = Utils.sort(eigenValues); double[] sortedEigenValues = new double[numDims]; double[][] sortedEigenVectors = new double[eigenVectors.length][numDims]; for (int i = 0; i < numDims; i++) { sortedEigenValues[i] = eigenValues[order[numDims - 1 - i]]; for (int j = 0; j < numRows; j++) { sortedEigenVectors[j][i] = eigenVectors[j][order[numDims - 1 - i]]; }//from w ww . jav a 2 s. c o m } for (int i = 0; i < numDims; i++) { eigenValues[i] = sortedEigenValues[i]; for (int j = 0; j < numRows; j++) { eigenVectors[j][i] = sortedEigenVectors[j][i]; } } return order; }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Sets up the priors for numeric class attributes from the training class * values that have been seen so far./* w w w. j av a 2s . com*/ */ protected void setNumericPriorsFromBuffer() { double numPrecision = 0.01; // Default value if (m_NumTrainClassVals > 1) { double[] temp = new double[m_NumTrainClassVals]; System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals); int[] index = Utils.sort(temp); double lastVal = temp[index[0]]; double deltaSum = 0; int distinct = 0; for (int i = 1; i < temp.length; i++) { double current = temp[index[i]]; if (current != lastVal) { deltaSum += current - lastVal; lastVal = current; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } m_PriorErrorEstimator = new KernelEstimator(numPrecision); m_ErrorEstimator = new KernelEstimator(numPrecision); m_ClassPriors[0] = m_ClassPriorsSum = 0; for (int i = 0; i < m_NumTrainClassVals; i++) { m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i]; m_ClassPriorsSum += m_TrainClassWeights[i]; m_PriorErrorEstimator.addValue(m_TrainClassVals[i], m_TrainClassWeights[i]); } }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
private void buildAttributeConstructor(Instances data) throws Exception { m_eigenvalues = null;/* ww w. j a v a2s. c o m*/ m_outputNumAtts = -1; m_attributeFilter = null; m_nominalToBinFilter = null; m_sumOfEigenValues = 0.0; m_trainInstances = new Instances(data); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainHeader = new Instances(m_trainInstances, 0); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); /*if (m_normalize) { m_normalizeFilter = new Normalize(); m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter); } */ m_nominalToBinFilter = new NominalToBinary(); m_nominalToBinFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter); // delete any attributes with only one distinct value or are all missing Vector deleteCols = new Vector(); for (int i = 0; i < m_trainInstances.numAttributes(); i++) { if (m_trainInstances.numDistinctValues(i) <= 1) { deleteCols.addElement(new Integer(i)); } } if (m_trainInstances.classIndex() >= 0) { // get rid of the class column m_hasClass = true; m_classIndex = m_trainInstances.classIndex(); deleteCols.addElement(new Integer(m_classIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int[] todelete = new int[deleteCols.size()]; for (int i = 0; i < deleteCols.size(); i++) { todelete[i] = ((Integer) (deleteCols.elementAt(i))).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_trainInstances); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); //fillCorrelation(); fillCovariance(); double[] d = new double[m_numAttribs]; double[][] v = new double[m_numAttribs][m_numAttribs]; Matrix corr = new Matrix(m_correlation); corr.eigenvalueDecomposition(v, d); m_eigenvectors = (double[][]) v.clone(); m_eigenvalues = (double[]) d.clone(); /*for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < m_numAttribs; j++) { System.err.println(v[i][j] + " "); } System.err.println(d[i]); } */ // any eigenvalues less than 0 are not worth anything --- change to 0 for (int i = 0; i < m_eigenvalues.length; i++) { if (m_eigenvalues[i] < 0) { m_eigenvalues[i] = 0.0; } } m_sortedEigens = Utils.sort(m_eigenvalues); m_sumOfEigenValues = Utils.sum(m_eigenvalues); m_transformedFormat = setOutputFormat(); if (m_transBackToOriginal) { m_originalSpaceFormat = setOutputFormatOriginal(); // new ordered eigenvector matrix int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes() : m_transformedFormat.numAttributes() - 1; double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1]; // try converting back to the original space for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { for (int j = 0; j < m_numAttribs; j++) { orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]]; } } // transpose the matrix int nr = orderedVectors.length; int nc = orderedVectors[0].length; m_eTranspose = new double[nc][nr]; for (int i = 0; i < nc; i++) { for (int j = 0; j < nr; j++) { m_eTranspose[i][j] = orderedVectors[j][i]; } } } }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Set the format for the transformed data * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set *///from w w w. j av a2 s . c om private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; FastVector attributes = new FastVector(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_inds[j] = j; } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) attName.append("+"); attName.append( Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) attName.append("..."); attributes.addElement(new Attribute(attName.toString())); cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.addElement(m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Returns the number of distinct values of a given attribute. The value * 'missing' is not counted.//from w ww. j ava2 s. co m * * @param attIndex * the attribute (index starts with 0) * @return the number of distinct values of a given attribute */ // @ requires 0 <= attIndex; // @ requires attIndex < numAttributes(); public/* @pure@ */int numDistinctValues(int attIndex) { if (attribute(attIndex).isNumeric()) { double[] attVals = attributeToDoubleArray(attIndex); int[] sorted = Utils.sort(attVals); double prev = 0; int counter = 0; for (int i = 0; i < sorted.length; i++) { ProcessInstance current = getInstance(sorted[i]); if (current.isMissing(attIndex)) { break; } if ((i == 0) || (current.value(attIndex) > prev)) { prev = current.value(attIndex); counter++; } } return counter; } //same for string values String[] strAttVals = attributeToStringArray(attIndex); int[] sorted = new int[strAttVals.length]; for (int i = 0; i < strAttVals.length; i++) { sorted[i] = i; } String prev = ""; int counter = 0; for (int i = 0; i < sorted.length; i++) { ProcessInstance current = getInstance(sorted[i]); if (current.isMissing(attIndex)) { break; } if ((i == 0) || (!current.stringValue(attIndex).equals(prev))) { prev = current.stringValue(attIndex); counter++; } } return counter; }
From source file:faster_pca.faster_pca.java
License:Open Source License
/** * Initializes the filter with the given input data. * * @param instances the data to process//from w w w . j av a2s.c om * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected void setup(Instances instances) throws Exception { int i; int j; Vector<Integer> deleteCols; int[] todelete; double[][] v; Matrix corr; EigenvalueDecomposition eig; Matrix V; m_TrainInstances = new Instances(instances); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_TrainCopy = new Instances(m_TrainInstances, 0); /*m_ReplaceMissingFilter = new ReplaceMissingValues(); m_ReplaceMissingFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);*/ m_NominalToBinaryFilter = new NominalToBinary(); m_NominalToBinaryFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter); // delete any attributes with only one distinct value or are all missing deleteCols = new Vector<Integer>(); /*for (i = 0; i < m_TrainInstances.numAttributes(); i++) { if (m_TrainInstances.numDistinctValues(i) <= 1) { deleteCols.addElement(i); } }*/ if (m_TrainInstances.classIndex() >= 0) { // get rid of the class column m_HasClass = true; m_ClassIndex = m_TrainInstances.classIndex(); deleteCols.addElement(new Integer(m_ClassIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_AttributeFilter = new Remove(); todelete = new int[deleteCols.size()]; for (i = 0; i < deleteCols.size(); i++) { todelete[i] = (deleteCols.elementAt(i)).intValue(); } m_AttributeFilter.setAttributeIndicesArray(todelete); m_AttributeFilter.setInvertSelection(false); m_AttributeFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_TrainInstances); m_NumInstances = m_TrainInstances.numInstances(); m_NumAttribs = m_TrainInstances.numAttributes(); // fillCorrelation(); fillCovariance(); // get eigen vectors/values corr = new Matrix(m_Correlation); eig = corr.eig(); V = eig.getV(); v = new double[m_NumAttribs][m_NumAttribs]; for (i = 0; i < v.length; i++) { for (j = 0; j < v[0].length; j++) { v[i][j] = V.get(i, j); } } m_Eigenvectors = v.clone(); m_Eigenvalues = eig.getRealEigenvalues().clone(); // any eigenvalues less than 0 are not worth anything --- change to 0 for (i = 0; i < m_Eigenvalues.length; i++) { if (m_Eigenvalues[i] < 0) { m_Eigenvalues[i] = 0.0; } } m_SortedEigens = Utils.sort(m_Eigenvalues); m_SumOfEigenValues = Utils.sum(m_Eigenvalues); m_TransformedFormat = determineOutputFormat(m_TrainInstances); setOutputFormat(m_TransformedFormat); m_TrainInstances = null; }