Example usage for weka.core Utils correlation

List of usage examples for weka.core Utils correlation

Introduction

In this page you can find the example usage for weka.core Utils correlation.

Prototype

public static final double correlation(double y1[], double y2[], int n) 

Source Link

Document

Returns the correlation coefficient of two double vectors.

Usage

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Fill the correlation matrix/*  www .  j a  va 2s  . c o m*/
 */
private void fillCorrelation() throws Exception {
    m_correlation = new double[m_numAttribs][m_numAttribs];
    double[] att1 = new double[m_numInstances];
    double[] att2 = new double[m_numInstances];
    double corr;

    for (int i = 0; i < m_numAttribs; i++) {
        for (int j = 0; j < m_numAttribs; j++) {
            for (int k = 0; k < m_numInstances; k++) {
                att1[k] = m_trainInstances.instance(k).value(i);
                att2[k] = m_trainInstances.instance(k).value(j);
            }
            if (i == j) {
                m_correlation[i][j] = 1.0;
                // store the standard deviation
                m_stdDevs[i] = Math.sqrt(Utils.variance(att1));
            } else {
                corr = Utils.correlation(att1, att2, m_numInstances);
                m_correlation[i][j] = corr;
                m_correlation[j][i] = corr;
            }
        }
    }

    // now standardize the input data
    m_standardizeFilter = new Standardize();
    m_standardizeFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter);
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
 * Modified version of PrincipalComponents.fillCorrelation()
 * @throws Exception //  w  w w . j  a  v a 2  s. co  m
 */
protected void fillCorrelation() throws Exception {
    int i;
    int j;
    int k;
    double[] att1;
    double[] att2;
    double corr;

    m_Correlation = new double[m_NumAttribs][m_NumAttribs];
    att1 = new double[m_NumInstances];
    att2 = new double[m_NumInstances];

    double trainInstancesCopy[][] = new double[m_NumAttribs][m_NumInstances];

    for (i = 0; i < m_NumInstances; i++) {
        Instance in = m_TrainInstances.instance(i);
        Enumeration enumer = in.enumerateAttributes();

        for (j = 0; j < m_NumAttribs; j++) {
            trainInstancesCopy[j][i] = in.value(j);
        }
    }

    for (i = 0; i < m_NumAttribs; i++) {
        for (j = 0; j <= i; j++) {
            /*for (k = 0; k < m_NumInstances; k++) {
              //att1[k] = m_TrainInstances.instance(k).value(i);
              att1[k] = trainInstancesCopy[i][k];
              //att2[k] = m_TrainInstances.instance(k).value(j);
              att2[k] = trainInstancesCopy[j][k];
            }*/
            if (i == j) {
                m_Correlation[i][j] = 1.0;
            } else {
                corr = Utils.correlation(trainInstancesCopy[i], trainInstancesCopy[j], m_NumInstances);
                m_Correlation[i][j] = corr;
                m_Correlation[j][i] = corr;
            }
        }
    }

    // now standardize the input data
    /*m_standardizeFilter = new Standardize();
    m_standardizeFilter.setInputFormat(m_TrainInstances);
    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);*/ //todo: see if this line actually needs called
    double mins[] = new double[m_NumAttribs];
    double maxs[] = new double[m_NumAttribs];

    for (j = 0; j < m_NumAttribs; j++) {
        mins[j] = Double.MAX_VALUE;
        maxs[j] = Double.MIN_VALUE;
        for (i = 0; i < m_NumInstances; i++) {
            double val = trainInstancesCopy[j][i];
            if (val < mins[j])
                mins[j] = val;
            if (val > maxs[j])
                maxs[j] = val;
        }

    }

    f_norm = new fast_normalize(mins, maxs);

}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * //from   ww  w.  j  a  v a 2  s.c  o m
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:mlda.attributes.AvgAbsoluteCorrelationBetweenNumericAttributes.java

License:Open Source License

/**
 * Calculate metric value/*  w  w w  .  jav a2s  .c o m*/
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();

    int numInstances = mlData.getNumInstances();

    double res = 0.0;
    int count = 0;

    int[] featureIndices = mlData.getFeatureIndices();

    Vector<Integer> numericFeatureIndices = new Vector<>();
    for (int fIndex : featureIndices) {
        if (instances.attribute(fIndex).isNumeric()) {
            numericFeatureIndices.add(fIndex);
        }
    }

    if (numericFeatureIndices.size() <= 0) {
        return Double.NaN;
    }

    double[][] attributesToDoubleArray = new double[numericFeatureIndices.size()][numInstances];
    for (int fIndex : numericFeatureIndices) {
        attributesToDoubleArray[fIndex] = instances.attributeToDoubleArray(fIndex);
    }

    for (int fIndex1 : numericFeatureIndices) {
        for (int fIndex2 = fIndex1 + 1; fIndex2 < numericFeatureIndices.size(); fIndex2++) {
            count++;
            res += Utils.correlation(attributesToDoubleArray[fIndex1], attributesToDoubleArray[fIndex2],
                    numInstances);
        }
    }

    if (count > 0) {
        this.value = res / count;
    } else {
        this.value = Double.NaN;
    }

    //this.value = res/count;
    return value;
}