List of usage examples for weka.core Utils correlation
public static final double correlation(double y1[], double y2[], int n)
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Fill the correlation matrix/* www . j a va 2s . c o m*/ */ private void fillCorrelation() throws Exception { m_correlation = new double[m_numAttribs][m_numAttribs]; double[] att1 = new double[m_numInstances]; double[] att2 = new double[m_numInstances]; double corr; for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < m_numAttribs; j++) { for (int k = 0; k < m_numInstances; k++) { att1[k] = m_trainInstances.instance(k).value(i); att2[k] = m_trainInstances.instance(k).value(j); } if (i == j) { m_correlation[i][j] = 1.0; // store the standard deviation m_stdDevs[i] = Math.sqrt(Utils.variance(att1)); } else { corr = Utils.correlation(att1, att2, m_numInstances); m_correlation[i][j] = corr; m_correlation[j][i] = corr; } } } // now standardize the input data m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter); }
From source file:faster_pca.faster_pca.java
License:Open Source License
/** * Modified version of PrincipalComponents.fillCorrelation() * @throws Exception // w w w . j a v a 2 s. co m */ protected void fillCorrelation() throws Exception { int i; int j; int k; double[] att1; double[] att2; double corr; m_Correlation = new double[m_NumAttribs][m_NumAttribs]; att1 = new double[m_NumInstances]; att2 = new double[m_NumInstances]; double trainInstancesCopy[][] = new double[m_NumAttribs][m_NumInstances]; for (i = 0; i < m_NumInstances; i++) { Instance in = m_TrainInstances.instance(i); Enumeration enumer = in.enumerateAttributes(); for (j = 0; j < m_NumAttribs; j++) { trainInstancesCopy[j][i] = in.value(j); } } for (i = 0; i < m_NumAttribs; i++) { for (j = 0; j <= i; j++) { /*for (k = 0; k < m_NumInstances; k++) { //att1[k] = m_TrainInstances.instance(k).value(i); att1[k] = trainInstancesCopy[i][k]; //att2[k] = m_TrainInstances.instance(k).value(j); att2[k] = trainInstancesCopy[j][k]; }*/ if (i == j) { m_Correlation[i][j] = 1.0; } else { corr = Utils.correlation(trainInstancesCopy[i], trainInstancesCopy[j], m_NumInstances); m_Correlation[i][j] = corr; m_Correlation[j][i] = corr; } } } // now standardize the input data /*m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);*/ //todo: see if this line actually needs called double mins[] = new double[m_NumAttribs]; double maxs[] = new double[m_NumAttribs]; for (j = 0; j < m_NumAttribs; j++) { mins[j] = Double.MAX_VALUE; maxs[j] = Double.MIN_VALUE; for (i = 0; i < m_NumInstances; i++) { double val = trainInstancesCopy[j][i]; if (val < mins[j]) mins[j] = val; if (val > maxs[j]) maxs[j] = val; } } f_norm = new fast_normalize(mins, maxs); }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * //from ww w. j a v a 2 s.c o m * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:mlda.attributes.AvgAbsoluteCorrelationBetweenNumericAttributes.java
License:Open Source License
/** * Calculate metric value/* w w w . jav a2s .c o m*/ * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int numInstances = mlData.getNumInstances(); double res = 0.0; int count = 0; int[] featureIndices = mlData.getFeatureIndices(); Vector<Integer> numericFeatureIndices = new Vector<>(); for (int fIndex : featureIndices) { if (instances.attribute(fIndex).isNumeric()) { numericFeatureIndices.add(fIndex); } } if (numericFeatureIndices.size() <= 0) { return Double.NaN; } double[][] attributesToDoubleArray = new double[numericFeatureIndices.size()][numInstances]; for (int fIndex : numericFeatureIndices) { attributesToDoubleArray[fIndex] = instances.attributeToDoubleArray(fIndex); } for (int fIndex1 : numericFeatureIndices) { for (int fIndex2 = fIndex1 + 1; fIndex2 < numericFeatureIndices.size(); fIndex2++) { count++; res += Utils.correlation(attributesToDoubleArray[fIndex1], attributesToDoubleArray[fIndex2], numInstances); } } if (count > 0) { this.value = res / count; } else { this.value = Double.NaN; } //this.value = res/count; return value; }