Returns the correlation between the two arrays of data, ignoring any Nan values See - Wikipedia - Java java.lang

Java examples for java.lang:Math Array Function

Description

Returns the correlation between the two arrays of data, ignoring any Nan values See - Wikipedia

Demo Code

/*//from w  ww .  jav  a  2  s.  com
 *  Java Information Dynamics Toolkit (JIDT)
 *  Copyright (C) 2012, Joseph T. Lizier
 *  
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//package com.java2s;

public class Main {
    /**
     * <p>Returns the correlation between the two arrays of data,
     *    ignoring any Nan values</p>
     * <p>See - <a href="http://en.wikipedia.org/wiki/Correlation">Wikipedia</a>
     * </p>
     * 
     * @param x
     * @param y
     * @param dataLength - number of terms in each vector to consider (we look at the first dataLength terms).
     *    Precondition: dataLength is less than min(x.length, y.length)
     * @return the covariance
     */
    public static double correlationIgnoreNans(double[] x, double[] y,
            int dataLength) {
        // return covariance(x, y) / stdDev(x) / stdDev(y);
        // Save some code time by reusing the code from inside covariance:
        double c = 0;
        double meanX = 0;
        double meanY = 0;
        int count = 0;
        for (int i = 0; i < dataLength; i++) {
            if ((!Double.isNaN(x[i])) && (!Double.isNaN(y[i]))) {
                // Only add the values in if they are not NaN
                meanX += x[i];
                meanY += y[i];
                count++;
            }
        }
        // Adjust for the values we've skipped:
        meanX = meanX / count;
        meanY = meanY / count;

        for (int t = 0; t < dataLength; t++) {
            if ((!Double.isNaN(x[t])) && (!Double.isNaN(y[t]))) {
                // Only add the product in if it is not NaN
                c += (x[t] - meanX) * (y[t] - meanY);
            }
        }
        double covariance = c / (double) (count - 1);

        // Now work out the std devs of each:
        double sumSqsX = 0.0;
        double sumSqsY = 0.0;
        for (int m = 0; m < dataLength; m++) {
            if ((!Double.isNaN(x[m])) && (!Double.isNaN(y[m]))) {
                // Ignore if one is NaN
                sumSqsX += (x[m] - meanX) * (x[m] - meanX);
                sumSqsY += (y[m] - meanY) * (y[m] - meanY);
            }
        }
        double stdX = sumSqsX / (double) (count - 1);
        stdX = Math.sqrt(stdX);
        double stdY = sumSqsY / (double) (count - 1);
        stdY = Math.sqrt(stdY);

        return covariance / stdX / stdY;
    }
}

Related Tutorials