Example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics

List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics.

Prototype

public SummaryStatistics() 

Source Link

Document

Construct a SummaryStatistics instance

Usage

From source file:org.eclipse.dataset.AbstractCompoundDataset.java

@Override
protected void calculateSummaryStats(boolean ignoreNaNs, final boolean ignoreInfs, String name) {
    IndexIterator iter = getIterator();//from  w  w  w  .  j  ava 2 s  .co m
    SummaryStatistics[] stats = new SummaryStatistics[isize];
    for (int i = 0; i < isize; i++)
        stats[i] = new SummaryStatistics();

    double[] vals = new double[isize];
    while (iter.hasNext()) {
        boolean okay = true;
        for (int i = 0; i < isize; i++) {
            final double val = getElementDoubleAbs(iter.index + i);
            if (ignoreNaNs && Double.isNaN(val)) {
                okay = false;
                break;
            }
            if (ignoreInfs && Double.isInfinite(val)) {
                okay = false;
                break;
            }
            vals[i] = val;
        }
        if (!okay)
            continue;
        for (int i = 0; i < isize; i++)
            stats[i].addValue(vals[i]);
    }

    // now all the calculations are done, add the values into store
    if (storedValues == null)
        storedValues = new HashMap<String, Object>();
    else
        storedValues.clear();

    for (int i = 0; i < isize; i++)
        storedValues.put(name + i, stats[i]);
}

From source file:org.eclipse.dataset.AbstractCompoundDataset.java

@Override
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) {
    int rank = getRank();

    int[] oshape = getShape();
    int alen = oshape[axis];
    oshape[axis] = 1;//from ww w. j av  a  2  s.c  om

    int[] nshape = squeezeShape(oshape, false);

    IntegerDataset count = new IntegerDataset(nshape);
    CompoundDoubleDataset sum = new CompoundDoubleDataset(isize, nshape);
    CompoundDoubleDataset mean = new CompoundDoubleDataset(isize, nshape);
    CompoundDoubleDataset var = new CompoundDoubleDataset(isize, nshape);

    IndexIterator qiter = count.getIterator(true);
    int[] qpos = qiter.getPos();
    int[] spos = oshape;
    double[] darray = new double[isize];

    while (qiter.hasNext()) {
        int i = 0;
        for (; i < axis; i++) {
            spos[i] = qpos[i];
        }
        spos[i++] = 0;
        for (; i < rank; i++) {
            spos[i] = qpos[i - 1];
        }

        final SummaryStatistics[] stats = new SummaryStatistics[isize];
        for (int k = 0; k < isize; k++) {
            stats[k] = new SummaryStatistics();
        }
        for (int j = 0; j < alen; j++) {
            spos[axis] = j;
            getDoubleArray(darray, spos);
            boolean skip = false;
            for (int k = 0; k < isize; k++) {
                double v = darray[k];
                if (ignoreNaNs && Double.isNaN(v)) {
                    skip = true;
                    break;
                }
                if (ignoreInfs && Double.isInfinite(v)) {
                    skip = true;
                    break;
                }
            }
            if (!skip)
                for (int k = 0; k < isize; k++) {
                    stats[k].addValue(darray[k]);
                }
        }

        count.setAbs(qiter.index, (int) stats[0].getN());

        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getSum();
        }
        sum.set(darray, qpos);
        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getMean();
        }
        mean.set(darray, qpos);
        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getVariance();
        }
        var.set(darray, qpos);
    }
    setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var);
}

From source file:org.eclipse.dataset.AbstractDataset.java

/**
 * Calculate summary statistics for a dataset
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 * @param name//w w  w  .jav a  2 s .co m
 */
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final String name) {
    final IndexIterator iter = getIterator();
    final SummaryStatistics stats = new SummaryStatistics();

    if (storedValues == null || !storedValues.containsKey(STORE_HASH)) {
        boolean hasNaNs = false;
        double hash = 0;
        double pmax = Double.MIN_VALUE;
        double pmin = Double.POSITIVE_INFINITY;

        while (iter.hasNext()) {
            final double val = getElementDoubleAbs(iter.index);
            if (Double.isNaN(val)) {
                hash = (hash * 19) % Integer.MAX_VALUE;
                if (ignoreNaNs)
                    continue;
                hasNaNs = true;
            } else if (Double.isInfinite(val)) {
                hash = (hash * 19) % Integer.MAX_VALUE;
                if (ignoreInfs)
                    continue;
            } else {
                hash = (hash * 19 + val) % Integer.MAX_VALUE;
            }
            if (val > 0) {
                if (val < pmin) {
                    pmin = val;
                }
                if (val > pmax) {
                    pmax = val;
                }
            }
            stats.addValue(val);
        }

        int ihash = ((int) hash) * 19 + getDtype() * 17 + getElementsPerItem();
        setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_SHAPELESS_HASH), ihash);
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX),
                hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMax()));
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN),
                hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMin()));
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_POS_MAX),
                hasNaNs ? Double.NaN : fromDoubleToNumber(pmax));
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_POS_MIN),
                hasNaNs ? Double.NaN : fromDoubleToNumber(pmin));
        storedValues.put(name, stats);
    } else {
        while (iter.hasNext()) {
            final double val = getElementDoubleAbs(iter.index);
            if (ignoreNaNs && Double.isNaN(val)) {
                continue;
            }
            if (ignoreInfs && Double.isInfinite(val)) {
                continue;
            }

            stats.addValue(val);
        }

        storedValues.put(name, stats);
    }
}

From source file:org.eclipse.dataset.AbstractDataset.java

/**
 * Calculate summary statistics for a dataset along an axis
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 * @param axis/*from w  w  w.j a va 2 s  .c  o  m*/
 */
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) {
    int rank = getRank();

    int[] oshape = getShape();
    int alen = oshape[axis];
    oshape[axis] = 1;

    int[] nshape = new int[rank - 1];
    for (int i = 0; i < axis; i++) {
        nshape[i] = oshape[i];
    }
    for (int i = axis + 1; i < rank; i++) {
        nshape[i - 1] = oshape[i];
    }

    final int dtype = getDtype();
    IntegerDataset count = new IntegerDataset(nshape);
    Dataset max = DatasetFactory.zeros(nshape, dtype);
    Dataset min = DatasetFactory.zeros(nshape, dtype);
    IntegerDataset maxIndex = new IntegerDataset(nshape);
    IntegerDataset minIndex = new IntegerDataset(nshape);
    Dataset sum = DatasetFactory.zeros(nshape, getLargestDType(dtype));
    DoubleDataset mean = new DoubleDataset(nshape);
    DoubleDataset var = new DoubleDataset(nshape);

    IndexIterator qiter = max.getIterator(true);
    int[] qpos = qiter.getPos();
    int[] spos = oshape.clone();

    while (qiter.hasNext()) {
        int i = 0;
        for (; i < axis; i++) {
            spos[i] = qpos[i];
        }
        spos[i++] = 0;
        for (; i < rank; i++) {
            spos[i] = qpos[i - 1];
        }

        final SummaryStatistics stats = new SummaryStatistics();
        double amax = Double.NEGATIVE_INFINITY;
        double amin = Double.POSITIVE_INFINITY;
        boolean hasNaNs = false;
        if (ignoreNaNs) {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);

                if (Double.isNaN(val)) {
                    hasNaNs = true;
                    continue;
                } else if (ignoreInfs && Double.isInfinite(val)) {
                    continue;
                }

                if (val > amax) {
                    amax = val;
                }
                if (val < amin) {
                    amin = val;
                }

                stats.addValue(val);
            }
        } else {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);

                if (hasNaNs) {
                    if (!Double.isNaN(val))
                        stats.addValue(0);
                    continue;
                }

                if (Double.isNaN(val)) {
                    amax = Double.NaN;
                    amin = Double.NaN;
                    hasNaNs = true;
                } else if (ignoreInfs && Double.isInfinite(val)) {
                    continue;
                } else {
                    if (val > amax) {
                        amax = val;
                    }
                    if (val < amin) {
                        amin = val;
                    }
                }
                stats.addValue(val);
            }
        }

        count.setAbs(qiter.index, (int) stats.getN());

        max.setObjectAbs(qiter.index, amax);
        min.setObjectAbs(qiter.index, amin);
        boolean fmax = false;
        boolean fmin = false;
        if (hasNaNs) {
            if (ignoreNaNs) {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = getDouble(spos);
                    if (Double.isNaN(val))
                        continue;

                    if (!fmax && val == amax) {
                        maxIndex.setAbs(qiter.index, j);
                        fmax = true;
                        if (fmin)
                            break;
                    }
                    if (!fmin && val == amin) {
                        minIndex.setAbs(qiter.index, j);
                        fmin = true;
                        if (fmax)
                            break;
                    }
                }
            } else {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = getDouble(spos);
                    if (Double.isNaN(val)) {
                        maxIndex.setAbs(qiter.index, j);
                        minIndex.setAbs(qiter.index, j);
                        break;
                    }
                }
            }
        } else {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);
                if (!fmax && val == amax) {
                    maxIndex.setAbs(qiter.index, j);
                    fmax = true;
                    if (fmin)
                        break;
                }
                if (!fmin && val == amin) {
                    minIndex.setAbs(qiter.index, j);
                    fmin = true;
                    if (fmax)
                        break;
                }
            }
        }
        sum.setObjectAbs(qiter.index, stats.getSum());
        mean.setAbs(qiter.index, stats.getMean());
        var.setAbs(qiter.index, stats.getVariance());
    }
    setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + "-" + axis), max);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + "-" + axis), min);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + STORE_INDEX + "-" + axis), maxIndex);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + STORE_INDEX + "-" + axis), minIndex);
}

From source file:org.eclipse.january.dataset.AbstractCompoundDataset.java

@Override
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) {
    int rank = getRank();

    int[] oshape = getShape();
    int alen = oshape[axis];
    oshape[axis] = 1;//from ww w. j  a  v  a 2s . com

    int[] nshape = ShapeUtils.squeezeShape(oshape, false);

    IntegerDataset count = new IntegerDataset(nshape);
    CompoundDoubleDataset sum = new CompoundDoubleDataset(isize, nshape);
    CompoundDoubleDataset mean = new CompoundDoubleDataset(isize, nshape);
    CompoundDoubleDataset var = new CompoundDoubleDataset(isize, nshape);

    IndexIterator qiter = count.getIterator(true);
    int[] qpos = qiter.getPos();
    int[] spos = oshape;
    double[] darray = new double[isize];

    while (qiter.hasNext()) {
        int i = 0;
        for (; i < axis; i++) {
            spos[i] = qpos[i];
        }
        spos[i++] = 0;
        for (; i < rank; i++) {
            spos[i] = qpos[i - 1];
        }

        final SummaryStatistics[] stats = new SummaryStatistics[isize];
        for (int k = 0; k < isize; k++) {
            stats[k] = new SummaryStatistics();
        }
        for (int j = 0; j < alen; j++) {
            spos[axis] = j;
            getDoubleArray(darray, spos);
            boolean skip = false;
            for (int k = 0; k < isize; k++) {
                double v = darray[k];
                if (ignoreNaNs && Double.isNaN(v)) {
                    skip = true;
                    break;
                }
                if (ignoreInfs && Double.isInfinite(v)) {
                    skip = true;
                    break;
                }
            }
            if (!skip)
                for (int k = 0; k < isize; k++) {
                    stats[k].addValue(darray[k]);
                }
        }

        count.setAbs(qiter.index, (int) stats[0].getN());

        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getSum();
        }
        sum.set(darray, qpos);
        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getMean();
        }
        mean.set(darray, qpos);
        for (int k = 0; k < isize; k++) {
            darray[k] = stats[k].getVariance();
        }
        var.set(darray, qpos);
    }
    setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var);
}

From source file:org.eclipse.january.dataset.AbstractDataset.java

/**
 * Calculate summary statistics for a dataset
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 * @param name/*from w w w.  ja v  a2  s  .  c o  m*/
 */
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final String name) {
    final IndexIterator iter = getIterator();
    final SummaryStatistics stats = new SummaryStatistics();
    //sum of logs is slow and we dont use it, so blocking its calculation here
    stats.setSumLogImpl(new NullStorelessUnivariateStatistic());

    if (storedValues == null || !storedValues.containsKey(STORE_HASH)) {
        boolean hasNaNs = false;
        double hash = 0;

        while (iter.hasNext()) {
            final double val = getElementDoubleAbs(iter.index);
            if (Double.isNaN(val)) {
                hash = (hash * 19) % Integer.MAX_VALUE;
                if (ignoreNaNs)
                    continue;
                hasNaNs = true;
            } else if (Double.isInfinite(val)) {
                hash = (hash * 19) % Integer.MAX_VALUE;
                if (ignoreInfs)
                    continue;
            } else {
                hash = (hash * 19 + val) % Integer.MAX_VALUE;
            }
            stats.addValue(val);
        }

        int ihash = ((int) hash) * 19 + getDType() * 17 + getElementsPerItem();
        setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_SHAPELESS_HASH), ihash);
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX),
                hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMax()));
        storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN),
                hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMin()));
        storedValues.put(name, stats);
    } else {
        while (iter.hasNext()) {
            final double val = getElementDoubleAbs(iter.index);
            if (ignoreNaNs && Double.isNaN(val)) {
                continue;
            }
            if (ignoreInfs && Double.isInfinite(val)) {
                continue;
            }

            stats.addValue(val);
        }

        storedValues.put(name, stats);
    }
}

From source file:org.eclipse.january.dataset.AbstractDataset.java

/**
 * Calculate summary statistics for a dataset along an axis
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 * @param axis/*from  w ww  .j  a v a  2 s.c  om*/
 */
protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) {
    int rank = getRank();

    int[] oshape = getShape();
    int alen = oshape[axis];
    oshape[axis] = 1;

    int[] nshape = new int[rank - 1];
    for (int i = 0; i < axis; i++) {
        nshape[i] = oshape[i];
    }
    for (int i = axis + 1; i < rank; i++) {
        nshape[i - 1] = oshape[i];
    }

    final int dtype = getDType();
    IntegerDataset count = new IntegerDataset(nshape);
    Dataset max = DatasetFactory.zeros(nshape, dtype);
    Dataset min = DatasetFactory.zeros(nshape, dtype);
    IntegerDataset maxIndex = new IntegerDataset(nshape);
    IntegerDataset minIndex = new IntegerDataset(nshape);
    Dataset sum = DatasetFactory.zeros(nshape, DTypeUtils.getLargestDType(dtype));
    DoubleDataset mean = new DoubleDataset(nshape);
    DoubleDataset var = new DoubleDataset(nshape);

    IndexIterator qiter = max.getIterator(true);
    int[] qpos = qiter.getPos();
    int[] spos = oshape.clone();

    while (qiter.hasNext()) {
        int i = 0;
        for (; i < axis; i++) {
            spos[i] = qpos[i];
        }
        spos[i++] = 0;
        for (; i < rank; i++) {
            spos[i] = qpos[i - 1];
        }

        final SummaryStatistics stats = new SummaryStatistics();
        //sum of logs is slow and we dont use it, so blocking its calculation here
        stats.setSumLogImpl(new NullStorelessUnivariateStatistic());

        double amax = Double.NEGATIVE_INFINITY;
        double amin = Double.POSITIVE_INFINITY;
        boolean hasNaNs = false;
        if (ignoreNaNs) {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);

                if (Double.isNaN(val)) {
                    hasNaNs = true;
                    continue;
                } else if (ignoreInfs && Double.isInfinite(val)) {
                    continue;
                }

                if (val > amax) {
                    amax = val;
                }
                if (val < amin) {
                    amin = val;
                }

                stats.addValue(val);
            }
        } else {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);

                if (hasNaNs) {
                    if (!Double.isNaN(val))
                        stats.addValue(0);
                    continue;
                }

                if (Double.isNaN(val)) {
                    amax = Double.NaN;
                    amin = Double.NaN;
                    hasNaNs = true;
                } else if (ignoreInfs && Double.isInfinite(val)) {
                    continue;
                } else {
                    if (val > amax) {
                        amax = val;
                    }
                    if (val < amin) {
                        amin = val;
                    }
                }
                stats.addValue(val);
            }
        }

        count.setAbs(qiter.index, (int) stats.getN());

        max.setObjectAbs(qiter.index, amax);
        min.setObjectAbs(qiter.index, amin);
        boolean fmax = false;
        boolean fmin = false;
        if (hasNaNs) {
            if (ignoreNaNs) {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = getDouble(spos);
                    if (Double.isNaN(val))
                        continue;

                    if (!fmax && val == amax) {
                        maxIndex.setAbs(qiter.index, j);
                        fmax = true;
                        if (fmin)
                            break;
                    }
                    if (!fmin && val == amin) {
                        minIndex.setAbs(qiter.index, j);
                        fmin = true;
                        if (fmax)
                            break;
                    }
                }
            } else {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = getDouble(spos);
                    if (Double.isNaN(val)) {
                        maxIndex.setAbs(qiter.index, j);
                        minIndex.setAbs(qiter.index, j);
                        break;
                    }
                }
            }
        } else {
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                final double val = getDouble(spos);
                if (!fmax && val == amax) {
                    maxIndex.setAbs(qiter.index, j);
                    fmax = true;
                    if (fmin)
                        break;
                }
                if (!fmin && val == amin) {
                    minIndex.setAbs(qiter.index, j);
                    fmin = true;
                    if (fmax)
                        break;
                }
            }
        }
        sum.setObjectAbs(qiter.index, stats.getSum());
        mean.setAbs(qiter.index, stats.getMean());
        var.setAbs(qiter.index, stats.getVariance());
    }
    setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + "-" + axis), max);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + "-" + axis), min);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + STORE_INDEX + "-" + axis), maxIndex);
    storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + STORE_INDEX + "-" + axis), minIndex);
}

From source file:org.eclipse.january.metadata.internal.StatisticsMetadataImpl.java

/**
 * Calculate summary statistics for a dataset
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 *///from  w  w w .  ja va2  s .c om
@SuppressWarnings("unchecked")
private SummaryStatistics[] createSummaryStats(final MaxMin<T> mm, final boolean ignoreNaNs,
        final boolean ignoreInfs) {
    final IndexIterator iter = dataset.getIterator();
    SummaryStatistics[] istats = new SummaryStatistics[isize];
    for (int i = 0; i < isize; i++) {
        istats[i] = new SummaryStatistics();
        // sum of logs is slow and we don't use it, so blocking its calculation here
        istats[i].setSumLogImpl(new NullStorelessUnivariateStatistic());
    }

    SummaryStatistics stats;
    if (isize == 1) {
        boolean hasNaNs = false;
        stats = istats[0];
        if (dataset.hasFloatingPointElements() && (ignoreNaNs || ignoreInfs)) {
            while (iter.hasNext()) {
                final double val = dataset.getElementDoubleAbs(iter.index);
                hash = (int) (hash * 19 + Double.doubleToRawLongBits(val));
                if (Double.isNaN(val)) {
                    if (ignoreNaNs)
                        continue;
                    hasNaNs = true;
                } else if (Double.isInfinite(val)) {
                    if (ignoreInfs)
                        continue;
                }
                stats.addValue(val);
            }
        } else if (dataset.hasFloatingPointElements()) {
            while (iter.hasNext()) {
                final double val = dataset.getElementDoubleAbs(iter.index);
                hash = (int) (hash * 19 + Double.doubleToRawLongBits(val));
                if (Double.isNaN(val)) {
                    hasNaNs = true;
                }
                stats.addValue(val);
            }
        } else {
            while (iter.hasNext()) {
                final long val = dataset.getElementLongAbs(iter.index);
                hash = (int) (hash * 19 + val);
                stats.addValue(val);
            }
        }

        mm.maximum = (T) (hasNaNs ? Double.NaN : DTypeUtils.fromDoubleToBiggestNumber(stats.getMax(), dtype));
        mm.minimum = (T) (hasNaNs ? Double.NaN : DTypeUtils.fromDoubleToBiggestNumber(stats.getMin(), dtype));
    } else {
        double[] vals = new double[isize];
        while (iter.hasNext()) {
            boolean okay = true;
            for (int j = 0; j < isize; j++) {
                final double val = dataset.getElementDoubleAbs(iter.index + j);
                if (ignoreNaNs && Double.isNaN(val)) {
                    okay = false;
                    break;
                }
                if (ignoreInfs && Double.isInfinite(val)) {
                    okay = false;
                    break;
                }
                vals[j] = val;
            }
            if (okay) {
                for (int j = 0; j < isize; j++) {
                    double val = vals[j];
                    istats[j].addValue(val);
                    hash = (int) (hash * 19 + Double.doubleToRawLongBits(val));
                }
            }
        }

        double[] lmax = new double[isize];
        double[] lmin = new double[isize];
        for (int j = 0; j < isize; j++) {
            stats = istats[j];
            lmax[j] = stats.getMax();
            lmin[j] = stats.getMin();
        }
        mm.maximum = (T) lmax;
        mm.minimum = (T) lmin;
    }

    hash = hash * 19 + dtype * 17 + isize;
    mm.maximumPositions = null;
    mm.minimumPositions = null;
    return istats;
}

From source file:org.eclipse.january.metadata.internal.StatisticsMetadataImpl.java

/**
 * Calculate summary statistics for a dataset along an axis
 * @param ignoreNaNs if true, ignore NaNs
 * @param ignoreInfs if true, ignore infinities
 * @param axis//from w  w  w  . jav  a  2s  .  c o  m
 */
@SuppressWarnings("deprecation")
private Dataset[] createAxisStats(final int axis, final boolean ignoreNaNs, final boolean ignoreInfs) {
    int rank = dataset.getRank();

    int[] oshape = dataset.getShape();
    int alen = oshape[axis];
    oshape[axis] = 1;

    int[] nshape = new int[rank - 1];
    for (int i = 0; i < axis; i++) {
        nshape[i] = oshape[i];
    }
    for (int i = axis + 1; i < rank; i++) {
        nshape[i - 1] = oshape[i];
    }

    Dataset max;
    Dataset min;
    IntegerDataset maxIndex;
    IntegerDataset minIndex;
    LongDataset count = DatasetFactory.zeros(LongDataset.class, nshape);
    Dataset sum;
    Dataset mean;
    Dataset var;

    if (isize == 1) {
        max = DatasetFactory.zeros(nshape, dtype);
        min = DatasetFactory.zeros(nshape, dtype);
        maxIndex = DatasetFactory.zeros(IntegerDataset.class, nshape);
        minIndex = DatasetFactory.zeros(IntegerDataset.class, nshape);
        sum = DatasetFactory.zeros(nshape, DTypeUtils.getLargestDType(dtype));
        mean = DatasetFactory.zeros(DoubleDataset.class, nshape);
        var = DatasetFactory.zeros(DoubleDataset.class, nshape);
    } else {
        max = null;
        min = null;
        maxIndex = null;
        minIndex = null;
        sum = DatasetFactory.zeros(isize, nshape, DTypeUtils.getLargestDType(dtype));
        mean = DatasetFactory.zeros(isize, CompoundDoubleDataset.class, nshape);
        var = DatasetFactory.zeros(isize, CompoundDoubleDataset.class, nshape);
    }

    IndexIterator qiter = count.getIterator(true);
    int[] qpos = qiter.getPos();
    int[] spos = oshape.clone();

    if (isize == 1) {
        DoubleDataset lmean = (DoubleDataset) mean;
        DoubleDataset lvar = (DoubleDataset) var;

        final SummaryStatistics stats = new SummaryStatistics();
        while (qiter.hasNext()) {
            int i = 0;
            for (; i < axis; i++) {
                spos[i] = qpos[i];
            }
            spos[i++] = 0;
            for (; i < rank; i++) {
                spos[i] = qpos[i - 1];
            }

            stats.clear();
            //sum of logs is slow and we dont use it, so blocking its calculation here
            stats.setSumLogImpl(new NullStorelessUnivariateStatistic());

            double amax = Double.NEGATIVE_INFINITY;
            double amin = Double.POSITIVE_INFINITY;
            boolean hasNaNs = false;
            if (ignoreNaNs) {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = dataset.getDouble(spos);

                    if (Double.isNaN(val)) {
                        hasNaNs = true;
                        continue;
                    } else if (ignoreInfs && Double.isInfinite(val)) {
                        continue;
                    }

                    if (val > amax) {
                        amax = val;
                    }
                    if (val < amin) {
                        amin = val;
                    }

                    stats.addValue(val);
                }
            } else {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = dataset.getDouble(spos);

                    if (hasNaNs) {
                        if (!Double.isNaN(val))
                            stats.addValue(0);
                        continue;
                    }

                    if (Double.isNaN(val)) {
                        amax = Double.NaN;
                        amin = Double.NaN;
                        hasNaNs = true;
                    } else if (ignoreInfs && Double.isInfinite(val)) {
                        continue;
                    } else {
                        if (val > amax) {
                            amax = val;
                        }
                        if (val < amin) {
                            amin = val;
                        }
                    }
                    stats.addValue(val);
                }
            }

            count.setAbs(qiter.index, stats.getN());

            max.set(amax, qpos);
            min.set(amin, qpos);
            boolean fmax = false;
            boolean fmin = false;
            if (hasNaNs) {
                if (ignoreNaNs) {
                    for (int j = 0; j < alen; j++) {
                        spos[axis] = j;
                        final double val = dataset.getDouble(spos);
                        if (Double.isNaN(val))
                            continue;

                        if (!fmax && val == amax) { // FIXME qiter.index is wrong!!!
                            maxIndex.setAbs(qiter.index, j);
                            fmax = true;
                            if (fmin)
                                break;
                        }
                        if (!fmin && val == amin) {
                            minIndex.setAbs(qiter.index, j);
                            fmin = true;
                            if (fmax)
                                break;
                        }
                    }
                } else {
                    for (int j = 0; j < alen; j++) {
                        spos[axis] = j;
                        final double val = dataset.getDouble(spos);
                        if (Double.isNaN(val)) {
                            maxIndex.setAbs(qiter.index, j);
                            minIndex.setAbs(qiter.index, j);
                            break;
                        }
                    }
                }
            } else {
                for (int j = 0; j < alen; j++) {
                    spos[axis] = j;
                    final double val = dataset.getDouble(spos);
                    if (!fmax && val == amax) {
                        maxIndex.setAbs(qiter.index, j);
                        fmax = true;
                        if (fmin)
                            break;
                    }
                    if (!fmin && val == amin) {
                        minIndex.setAbs(qiter.index, j);
                        fmin = true;
                        if (fmax)
                            break;
                    }
                }
            }
            sum.setObjectAbs(qiter.index, stats.getSum());
            lmean.setAbs(qiter.index, stats.getMean());
            lvar.setAbs(qiter.index, stats.getVariance());
        }
    } else {
        CompoundDataset ldataset = (CompoundDataset) dataset;
        CompoundDoubleDataset lmean = (CompoundDoubleDataset) mean;
        CompoundDoubleDataset lvar = (CompoundDoubleDataset) var;
        double[] darray = new double[isize];

        while (qiter.hasNext()) {
            int i = 0;
            for (; i < axis; i++) {
                spos[i] = qpos[i];
            }
            spos[i++] = 0;
            for (; i < rank; i++) {
                spos[i] = qpos[i - 1];
            }

            final SummaryStatistics[] stats = new SummaryStatistics[isize];
            for (int k = 0; k < isize; k++) {
                stats[k] = new SummaryStatistics();
            }
            for (int j = 0; j < alen; j++) {
                spos[axis] = j;
                ldataset.getDoubleArray(darray, spos);
                boolean skip = false;
                for (int k = 0; k < isize; k++) {
                    double v = darray[k];
                    if (ignoreNaNs && Double.isNaN(v)) {
                        skip = true;
                        break;
                    }
                    if (ignoreInfs && Double.isInfinite(v)) {
                        skip = true;
                        break;
                    }
                }
                if (!skip)
                    for (int k = 0; k < isize; k++) {
                        stats[k].addValue(darray[k]);
                    }
            }

            count.setAbs(qiter.index, (int) stats[0].getN());

            for (int k = 0; k < isize; k++) {
                darray[k] = stats[k].getSum();
            }
            sum.set(darray, qpos);
            for (int k = 0; k < isize; k++) {
                darray[k] = stats[k].getMean();
            }
            lmean.setItem(darray, qpos);
            for (int k = 0; k < isize; k++) {
                darray[k] = stats[k].getVariance();
            }
            lvar.setItem(darray, qpos);
        }
    }

    return new Dataset[] { max, min, maxIndex, minIndex, count, mean, sum, var };
}

From source file:org.hoidla.window.WindowUtils.java

/**
 * Identifies outliers//  w  w  w. jav a 2  s . c  om
 * @param data
 * @param outlierThresholdFactor
 * @param pattern
 * @return indexes outlying data points
 */
public static List<Integer> removeOutliers(double[] data, int outlierThresholdFactor, double[] pattern) {
    if (null != pattern && data.length != pattern.length) {
        throw new IllegalArgumentException("data and pattern need to be of same size");
    }

    //stats
    SummaryStatistics stats = new SummaryStatistics();
    for (double value : data) {
        stats.addValue(value);
    }
    double mean = stats.getMean();
    double stdDev = stats.getStandardDeviation();

    //thresholds
    double upThreshold = mean + outlierThresholdFactor * stdDev;
    double loThreshold = mean - outlierThresholdFactor * stdDev;

    //detect outliers
    List<Integer> outliers = new ArrayList<Integer>();
    int i = 0;
    for (double value : data) {
        if (value > upThreshold || value < loThreshold) {
            //replace with pattern value so that there is no net effect
            if (null != pattern) {
                data[i] = pattern[i];
            }
            outliers.add(i);
        }
        ++i;
    }

    return outliers;
}