Computes the similarity of the two vectors. - Java java.lang

Java examples for java.lang:Math Vector

Description

Computes the similarity of the two vectors.

Demo Code

/** A collection of mathematical utility functions.
 * <p>//from  w w  w .  j  ava  2 s . c  o m
 * Copyright (c) 2008 Eric Eaton
 * <p>
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * <p>
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * <p>
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses/.
 * 
 * @author Eric Eaton (EricEaton@umbc.edu) <br>
 *         University of Maryland Baltimore County
 * 
 * @version 0.1
 *
 */
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Random;

public class Main{
    final static double LOG2 = Math.log(2);
    /**
     * Computes the similarity of the two vectors.
     * @param targetV the target vector that forms the basis for comparison.
     * @param v the vector for comparison
     * @param similarityMetric the metric to use for computing the similarity
     * @return The similarity of the two models.
     */
    public static double computeSimilarity(int[] targetV, int[] v,
            SimilarityMetric similarityMetric) {

        switch (similarityMetric) {

        case CORRELATION:
            return MathUtils.correlation(targetV, v);

        case MUTUAL_INFORMATION:
            return MathUtils.mutualInformation(targetV, v);

        case ACCURACY:
            return MathUtils.pairwiseAgreement(targetV, v);

        }

        return Double.NaN;

    }
    /** Computes the correlation between two arrays of the same length, p and q.
     * Computes the correlation between p and q as
     * r = (|p| * \sum_i(p[i]*q[i]) - \sum_i(p[i]) * \sum_i(q[i]))/
     *      sqrt((|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2) *
     *           (|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2))
     * This correlation can be tested for statistical significance via t-tests. 
     * See e.g.: http://www.socialresearchmethods.net/kb/statcorr.htm
     * @return The correlation between the elements of the two arrays.
     */
    public static double correlation(int[] p, int[] q) {

        if (p == null || q == null) {
            throw new IllegalArgumentException("p and q cannot be null");
        }
        if (p.length != q.length) {
            throw new IllegalArgumentException(
                    "p and q must be the same length");
        }

        // compute the sums and squared sums
        int sumP = 0;
        int sumQ = 0;
        int sumPSquared = 0;
        int sumQSquared = 0;
        int sumPQ = 0;
        for (int i = 0; i < p.length; i++) {
            sumP += p[i];
            sumQ += q[i];
            sumPSquared += p[i] * p[i];
            sumQSquared += q[i] * q[i];
            sumPQ += p[i] * q[i];
        }

        // compute the correlation
        double r = ((double) (p.length * sumPQ - sumP * sumQ))
                / Math.sqrt(((long) (p.length * sumPSquared - sumP * sumP))
                        * ((long) (p.length * sumQSquared - sumQ * sumQ)));

        return r;
    }
    /** Computes the correlation between two arrays of the same length, p and q.
     * Computes the correlation between p and q as
     * r = (|p| * \sum_i(p[i]*q[i]) - \sum_i(p[i]) * \sum_i(q[i]))/
     *      sqrt((|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2) *
     *           (|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2))
     * This correlation can be tested for statistical significance via t-tests. 
     * See e.g.: http://www.socialresearchmethods.net/kb/statcorr.htm
     * @return The correlation between the elements of the two arrays.
     */
    public static double correlation(double[] p, double[] q) {

        if (p == null || q == null) {
            throw new IllegalArgumentException("p and q cannot be null");
        }
        if (p.length != q.length) {
            throw new IllegalArgumentException(
                    "p and q must be the same length");
        }

        // compute the sums and squared sums
        double sumP = 0;
        double sumQ = 0;
        double sumPSquared = 0;
        double sumQSquared = 0;
        double sumPQ = 0;
        for (int i = 0; i < p.length; i++) {
            sumP += p[i];
            sumQ += q[i];
            sumPSquared += p[i] * p[i];
            sumQSquared += q[i] * q[i];
            sumPQ += p[i] * q[i];
        }

        // compute the correlation
        double r = (p.length * sumPQ - sumP * sumQ)
                / Math.sqrt((p.length * sumPSquared - sumP * sumP)
                        * (p.length * sumQSquared - sumQ * sumQ));

        return r;
    }
    /** Computes the mutual information between two vectors.
     * @param p the first vector.
     * @param q the second vector.
     * @return the mutual information between p and q.
     */
    public static double mutualInformation(int[] p, int[] q) {
        double[][] confusionMatrix = getConfusionMatrix(p, q);

        // get the row and col sums of the confusion matrix
        double[] rowsum = new double[confusionMatrix.length];
        double[] colsum = new double[confusionMatrix.length];
        for (int i = 0; i < confusionMatrix.length; i++) {
            for (int j = 0; j < confusionMatrix.length; j++) {
                rowsum[i] += confusionMatrix[i][j];
                colsum[j] += confusionMatrix[i][j];
            }
        }

        // compute the mutual information
        double mutualInformation = 0;
        for (int i = 0; i < confusionMatrix.length; i++) {
            for (int j = 0; j < confusionMatrix.length; j++) {
                double deltaMI = 0;
                // if entry is not 0, then the deltaMI shouldn't be 0
                if (confusionMatrix[i][j] != 0) {
                    deltaMI = confusionMatrix[i][j]
                            * log2(confusionMatrix[i][j]
                                    / (rowsum[i] * colsum[j]));
                }
                if (Double.isNaN(deltaMI)) {
                    throw new IllegalStateException("MI is NaN!");
                }
                mutualInformation += deltaMI;
            }
        }

        return mutualInformation;
    }
    /** Computes the pairwise agreement between two pairwise arrays of labelings.
     * The pairwise agreement is the number-of-pairs-in-agreement / the-total-number-of-pairs.
     * The two arrays must be the same length.
     * @param p An array of labels.
     * @param q An array of labels.
     * @return The pairwise agreement between the labelings in p and q.
     */
    public static double pairwiseAgreement(int[] p, int[] q) {

        if (p == null || q == null) {
            throw new IllegalArgumentException("p and q cannot be null");
        }
        if (p.length != q.length) {
            throw new IllegalArgumentException(
                    "p and q must be the same length");
        }

        int numSamePairs = 0;
        for (int i = 0; i < p.length; i++) {
            if (p[i] == q[i])
                numSamePairs++;
        }

        return ((double) numSamePairs) / p.length;
    }
    /** Computes the normalized confusion matrix for two vectors.
     * @param p the first vector
     * @param q the second vector
     * @return the normalized confusion matrix for p and q
     */
    public static double[][] getConfusionMatrix(int[] p, int[] q) {

        if (p.length != q.length) {
            throw new IllegalArgumentException(
                    "p and q must be the same length.");
        }

        int[] classes = uniqueValues(append(p, q));
        int n = p.length;

        // compute the confusion matrix
        double[][] confusionMatrix = new double[classes.length][classes.length];
        for (int i = 0; i < n; i++) {
            // determine the classIdx of p[i]
            int piClassIdx;
            for (piClassIdx = 0; piClassIdx < classes.length; piClassIdx++) {
                if (p[i] == classes[piClassIdx])
                    break;
            }
            // determine the classIdx of q[i]
            int qiClassIdx;
            for (qiClassIdx = 0; qiClassIdx < classes.length; qiClassIdx++) {
                if (q[i] == classes[qiClassIdx])
                    break;
            }
            // increment the counter in the confusion matrix
            confusionMatrix[piClassIdx][qiClassIdx]++;
        }

        // normalize the confusion matrix
        for (int i = 0; i < confusionMatrix.length; i++) {
            for (int j = 0; j < confusionMatrix.length; j++) {
                confusionMatrix[i][j] /= n;
            }
        }

        return confusionMatrix;
    }
    /** Computes the log-base-2 of a number.
     * @param d
     * @return the log-base-2 of d
     */
    public static double log2(double d) {
        return Math.log(d) / LOG2;
    }
    /** Determines the unique values of v.  The values are returned in no particular order.
     * @param v
     * @return the unique values of v in no particular order.
     */
    public static int[] uniqueValues(int[] v) {
        // form the values into a set, which automatically removes duplicates
        HashSet<Integer> uniqueValues = new HashSet<Integer>();
        for (int i = 0; i < v.length; i++) {
            uniqueValues.add(v[i]);
        }
        // convert the set back into an array
        int[] vUnique = new int[uniqueValues.size()];
        int i = 0;
        for (Integer uniqueValue : uniqueValues) {
            vUnique[i++] = uniqueValue;
        }
        return vUnique;
    }
    /** Determines the unique values of v.  The values are returned in no particular order.
     * @param v
     * @return the unique values of v in no particular order.
     */
    public static double[] uniqueValues(double[] v) {
        // form the values into a set, which automatically removes duplicates
        HashSet<Double> uniqueValues = new HashSet<Double>();
        for (int i = 0; i < v.length; i++) {
            uniqueValues.add(v[i]);
        }
        // convert the set back into an array
        double[] vUnique = new double[uniqueValues.size()];
        int i = 0;
        for (Double uniqueValue : uniqueValues) {
            vUnique[i++] = uniqueValue;
        }
        return vUnique;
    }
    /** Appends an element to a vector.
     * @param v1 the vector.
     * @param d the element to append.
     * @return A vector containing all the elements of v1 followed
     * by d.
     */
    public static int[] append(int[] v1, int d) {
        int[] newVector = new int[v1.length + 1];
        System.arraycopy(v1, 0, newVector, 0, v1.length);
        newVector[v1.length] = d;
        return newVector;
    }
    /** Appends an element to a vector.
     * @param v1 the vector.
     * @param d the element to append.
     * @return A vector containing all the elements of v1 followed
     * by d.
     */
    public static double[] append(double[] v1, double d) {
        double[] newVector = new double[v1.length + 1];
        System.arraycopy(v1, 0, newVector, 0, v1.length);
        newVector[v1.length] = d;
        return newVector;
    }
    /** Appends two vectors.
     * @param v1 the first vector.
     * @param v2 the second vector.
     * @return A vector containing all the elements of v1 followed
     * by all the elements of v2.
     */
    public static double[] append(double[] v1, double[] v2) {
        double[] newVector = new double[v1.length + v2.length];
        System.arraycopy(v1, 0, newVector, 0, v1.length);
        System.arraycopy(v2, 0, newVector, v1.length, v2.length);
        return newVector;
    }
    /** Appends two vectors.
     * @param v1 the first vector.
     * @param v2 the second vector.
     * @return A vector containing all the elements of v1 followed
     * by all the elements of v2.
     */
    public static int[] append(int[] v1, int[] v2) {
        int[] newVector = new int[v1.length + v2.length];
        System.arraycopy(v1, 0, newVector, 0, v1.length);
        System.arraycopy(v2, 0, newVector, v1.length, v2.length);
        return newVector;
    }
}

Related Tutorials