org.openml.webapplication.fantail.dc.statistical.Statistical.java Source code

Java tutorial

Introduction

Here is the source code for org.openml.webapplication.fantail.dc.statistical.Statistical.java

Source

/*
 *  Webapplication - Java library that runs on OpenML servers
 *  Copyright (C) 2014 
 *  @author Jan N. van Rijn (j.n.van.rijn@liacs.leidenuniv.nl)
 *  @author Quan Sun (quan.sun.nz@gmail.com)
 *  
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *  
 */
package org.openml.webapplication.fantail.dc.statistical;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.math3.stat.StatUtils;
import org.openml.webapplication.fantail.dc.Characterizer;

import weka.core.Instance;
import weka.core.Instances;

public class Statistical extends Characterizer {

    private List<Double> meanList = new ArrayList<Double>();
    private List<Double> stdDevList = new ArrayList<Double>();
    private List<Double> kurtosisList = new ArrayList<Double>();
    private List<Double> skewnessList = new ArrayList<Double>();

    protected final String[] ids = new String[] { "MeanMeansOfNumericAtts", "MeanStdDevOfNumericAtts",
            "MeanKurtosisOfNumericAtts", "MeanSkewnessOfNumericAtts",

            "MinMeansOfNumericAtts", "MinStdDevOfNumericAtts", "MinKurtosisOfNumericAtts",
            "MinSkewnessOfNumericAtts",

            "MaxMeansOfNumericAtts", "MaxStdDevOfNumericAtts", "MaxKurtosisOfNumericAtts",
            "MaxSkewnessOfNumericAtts",

            "Quartile1MeansOfNumericAtts", "Quartile1StdDevOfNumericAtts", "Quartile1KurtosisOfNumericAtts",
            "Quartile1SkewnessOfNumericAtts",

            "Quartile2MeansOfNumericAtts", "Quartile2StdDevOfNumericAtts", "Quartile2KurtosisOfNumericAtts",
            "Quartile2SkewnessOfNumericAtts",

            "Quartile3MeansOfNumericAtts", "Quartile3StdDevOfNumericAtts", "Quartile3KurtosisOfNumericAtts",
            "Quartile3SkewnessOfNumericAtts" };

    @Override
    public String[] getIDs() {
        return ids;
    }

    @Override
    public Map<String, Double> characterize(Instances instances) {
        int attrib_count = instances.numAttributes() - 1, numeric_count = 0;

        for (int i = 0; i < attrib_count; i++) {
            if (instances.attribute(i).isNumeric()) {
                numeric_count++;
                final double mean = instances.meanOrMode(i);
                final double stddev = Math.sqrt(instances.variance(i));
                final double kurtosis = findKurtosis(instances, mean, stddev, i);
                final double skewness = findSkewness(instances, mean, stddev, i);

                meanList.add(mean);
                stdDevList.add(stddev);
                kurtosisList.add(kurtosis);
                skewnessList.add(skewness);
            }
        }

        if (0 == numeric_count) {

            Map<String, Double> qualities = new HashMap<String, Double>();
            qualities.put(ids[0], 0.0);
            qualities.put(ids[1], 0.0);
            qualities.put(ids[2], 0.0);
            qualities.put(ids[3], 0.0);

            qualities.put(ids[4], 0.0);
            qualities.put(ids[5], 0.0);
            qualities.put(ids[6], 0.0);
            qualities.put(ids[7], 0.0);

            qualities.put(ids[8], 0.0);
            qualities.put(ids[9], 0.0);
            qualities.put(ids[10], 0.0);
            qualities.put(ids[11], 0.0);

            qualities.put(ids[12], 0.0);
            qualities.put(ids[13], 0.0);
            qualities.put(ids[14], 0.0);
            qualities.put(ids[15], 0.0);

            qualities.put(ids[16], 0.0);
            qualities.put(ids[17], 0.0);
            qualities.put(ids[18], 0.0);
            qualities.put(ids[19], 0.0);

            qualities.put(ids[20], 0.0);
            qualities.put(ids[21], 0.0);
            qualities.put(ids[22], 0.0);
            qualities.put(ids[23], 0.0);
            return qualities;
        } else {
            double[] meansArray = ArrayUtils.toPrimitive(meanList.toArray(new Double[numeric_count]));
            double[] stdDevsArray = ArrayUtils.toPrimitive(stdDevList.toArray(new Double[numeric_count]));
            double[] kurtosisArray = ArrayUtils.toPrimitive(kurtosisList.toArray(new Double[numeric_count]));
            double[] skewnessArray = ArrayUtils.toPrimitive(skewnessList.toArray(new Double[numeric_count]));

            Map<String, Double> qualities = new HashMap<String, Double>();
            qualities.put(ids[0], StatUtils.mean(meansArray));
            qualities.put(ids[1], StatUtils.mean(stdDevsArray));
            qualities.put(ids[2], StatUtils.mean(kurtosisArray));
            qualities.put(ids[3], StatUtils.mean(skewnessArray));

            qualities.put(ids[4], StatUtils.min(meansArray));
            qualities.put(ids[5], StatUtils.min(stdDevsArray));
            qualities.put(ids[6], StatUtils.min(kurtosisArray));
            qualities.put(ids[7], StatUtils.min(skewnessArray));

            qualities.put(ids[8], StatUtils.max(meansArray));
            qualities.put(ids[9], StatUtils.max(stdDevsArray));
            qualities.put(ids[10], StatUtils.max(kurtosisArray));
            qualities.put(ids[11], StatUtils.max(skewnessArray));

            qualities.put(ids[12], StatUtils.percentile(meansArray, 25));
            qualities.put(ids[13], StatUtils.percentile(stdDevsArray, 25));
            qualities.put(ids[14], StatUtils.percentile(kurtosisArray, 25));
            qualities.put(ids[15], StatUtils.percentile(skewnessArray, 25));

            qualities.put(ids[16], StatUtils.percentile(meansArray, 50));
            qualities.put(ids[17], StatUtils.percentile(stdDevsArray, 50));
            qualities.put(ids[18], StatUtils.percentile(kurtosisArray, 50));
            qualities.put(ids[19], StatUtils.percentile(skewnessArray, 50));

            qualities.put(ids[20], StatUtils.percentile(meansArray, 75));
            qualities.put(ids[21], StatUtils.percentile(stdDevsArray, 75));
            qualities.put(ids[22], StatUtils.percentile(kurtosisArray, 75));
            qualities.put(ids[23], StatUtils.percentile(skewnessArray, 75));
            return qualities;
        }
    }

    private static double findKurtosis(Instances instances, double mean, double stddev, int attrib) {
        final double S4 = Math.pow(stddev, 4), YBAR = mean;
        double sum = 0.0;
        final int COUNT = instances.numInstances();
        int n = 0;

        if (S4 == 0) {
            return 0;
        }

        for (int i = 0; i < COUNT; i++) {
            Instance instance = instances.instance(i);
            if (!instance.isMissing(attrib)) {
                n++;
                sum += Math.pow(instance.value(attrib) - YBAR, 4);
            }
        }

        return (sum / ((n - 1) * S4)) - 3;
    }

    private static double findSkewness(Instances instances, double mean, double stddev, int attrib) {
        final double S3 = Math.pow(stddev, 3), YBAR = mean;
        double sum = 0.0;
        final int COUNT = instances.numInstances();
        int n = 0;

        if (S3 == 0) {
            return 0;
        }

        for (int i = 0; i < COUNT; i++) {
            Instance instance = instances.instance(i);
            if (!instance.isMissing(attrib)) {
                n++;
                sum += Math.pow(instance.value(attrib) - YBAR, 3);
            }
        }

        return (sum / ((n - 1) * S3));
    }
}