List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics SummaryStatistics
public SummaryStatistics()
From source file:org.eclipse.dataset.AbstractCompoundDataset.java
@Override protected void calculateSummaryStats(boolean ignoreNaNs, final boolean ignoreInfs, String name) { IndexIterator iter = getIterator();//from w w w . j ava 2 s .co m SummaryStatistics[] stats = new SummaryStatistics[isize]; for (int i = 0; i < isize; i++) stats[i] = new SummaryStatistics(); double[] vals = new double[isize]; while (iter.hasNext()) { boolean okay = true; for (int i = 0; i < isize; i++) { final double val = getElementDoubleAbs(iter.index + i); if (ignoreNaNs && Double.isNaN(val)) { okay = false; break; } if (ignoreInfs && Double.isInfinite(val)) { okay = false; break; } vals[i] = val; } if (!okay) continue; for (int i = 0; i < isize; i++) stats[i].addValue(vals[i]); } // now all the calculations are done, add the values into store if (storedValues == null) storedValues = new HashMap<String, Object>(); else storedValues.clear(); for (int i = 0; i < isize; i++) storedValues.put(name + i, stats[i]); }
From source file:org.eclipse.dataset.AbstractCompoundDataset.java
@Override protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { int rank = getRank(); int[] oshape = getShape(); int alen = oshape[axis]; oshape[axis] = 1;//from ww w. j av a 2 s.c om int[] nshape = squeezeShape(oshape, false); IntegerDataset count = new IntegerDataset(nshape); CompoundDoubleDataset sum = new CompoundDoubleDataset(isize, nshape); CompoundDoubleDataset mean = new CompoundDoubleDataset(isize, nshape); CompoundDoubleDataset var = new CompoundDoubleDataset(isize, nshape); IndexIterator qiter = count.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape; double[] darray = new double[isize]; while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics[] stats = new SummaryStatistics[isize]; for (int k = 0; k < isize; k++) { stats[k] = new SummaryStatistics(); } for (int j = 0; j < alen; j++) { spos[axis] = j; getDoubleArray(darray, spos); boolean skip = false; for (int k = 0; k < isize; k++) { double v = darray[k]; if (ignoreNaNs && Double.isNaN(v)) { skip = true; break; } if (ignoreInfs && Double.isInfinite(v)) { skip = true; break; } } if (!skip) for (int k = 0; k < isize; k++) { stats[k].addValue(darray[k]); } } count.setAbs(qiter.index, (int) stats[0].getN()); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getSum(); } sum.set(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getMean(); } mean.set(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getVariance(); } var.set(darray, qpos); } setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var); }
From source file:org.eclipse.dataset.AbstractDataset.java
/** * Calculate summary statistics for a dataset * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities * @param name//w w w .jav a 2 s .co m */ protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final String name) { final IndexIterator iter = getIterator(); final SummaryStatistics stats = new SummaryStatistics(); if (storedValues == null || !storedValues.containsKey(STORE_HASH)) { boolean hasNaNs = false; double hash = 0; double pmax = Double.MIN_VALUE; double pmin = Double.POSITIVE_INFINITY; while (iter.hasNext()) { final double val = getElementDoubleAbs(iter.index); if (Double.isNaN(val)) { hash = (hash * 19) % Integer.MAX_VALUE; if (ignoreNaNs) continue; hasNaNs = true; } else if (Double.isInfinite(val)) { hash = (hash * 19) % Integer.MAX_VALUE; if (ignoreInfs) continue; } else { hash = (hash * 19 + val) % Integer.MAX_VALUE; } if (val > 0) { if (val < pmin) { pmin = val; } if (val > pmax) { pmax = val; } } stats.addValue(val); } int ihash = ((int) hash) * 19 + getDtype() * 17 + getElementsPerItem(); setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_SHAPELESS_HASH), ihash); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX), hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMax())); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN), hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMin())); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_POS_MAX), hasNaNs ? Double.NaN : fromDoubleToNumber(pmax)); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_POS_MIN), hasNaNs ? Double.NaN : fromDoubleToNumber(pmin)); storedValues.put(name, stats); } else { while (iter.hasNext()) { final double val = getElementDoubleAbs(iter.index); if (ignoreNaNs && Double.isNaN(val)) { continue; } if (ignoreInfs && Double.isInfinite(val)) { continue; } stats.addValue(val); } storedValues.put(name, stats); } }
From source file:org.eclipse.dataset.AbstractDataset.java
/** * Calculate summary statistics for a dataset along an axis * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities * @param axis/*from w w w.j a va 2 s .c o m*/ */ protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { int rank = getRank(); int[] oshape = getShape(); int alen = oshape[axis]; oshape[axis] = 1; int[] nshape = new int[rank - 1]; for (int i = 0; i < axis; i++) { nshape[i] = oshape[i]; } for (int i = axis + 1; i < rank; i++) { nshape[i - 1] = oshape[i]; } final int dtype = getDtype(); IntegerDataset count = new IntegerDataset(nshape); Dataset max = DatasetFactory.zeros(nshape, dtype); Dataset min = DatasetFactory.zeros(nshape, dtype); IntegerDataset maxIndex = new IntegerDataset(nshape); IntegerDataset minIndex = new IntegerDataset(nshape); Dataset sum = DatasetFactory.zeros(nshape, getLargestDType(dtype)); DoubleDataset mean = new DoubleDataset(nshape); DoubleDataset var = new DoubleDataset(nshape); IndexIterator qiter = max.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape.clone(); while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics stats = new SummaryStatistics(); double amax = Double.NEGATIVE_INFINITY; double amin = Double.POSITIVE_INFINITY; boolean hasNaNs = false; if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) { hasNaNs = true; continue; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } if (val > amax) { amax = val; } if (val < amin) { amin = val; } stats.addValue(val); } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (hasNaNs) { if (!Double.isNaN(val)) stats.addValue(0); continue; } if (Double.isNaN(val)) { amax = Double.NaN; amin = Double.NaN; hasNaNs = true; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } else { if (val > amax) { amax = val; } if (val < amin) { amin = val; } } stats.addValue(val); } } count.setAbs(qiter.index, (int) stats.getN()); max.setObjectAbs(qiter.index, amax); min.setObjectAbs(qiter.index, amin); boolean fmax = false; boolean fmin = false; if (hasNaNs) { if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) continue; if (!fmax && val == amax) { maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) { maxIndex.setAbs(qiter.index, j); minIndex.setAbs(qiter.index, j); break; } } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (!fmax && val == amax) { maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } sum.setObjectAbs(qiter.index, stats.getSum()); mean.setAbs(qiter.index, stats.getMean()); var.setAbs(qiter.index, stats.getVariance()); } setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + "-" + axis), max); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + "-" + axis), min); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + STORE_INDEX + "-" + axis), maxIndex); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + STORE_INDEX + "-" + axis), minIndex); }
From source file:org.eclipse.january.dataset.AbstractCompoundDataset.java
@Override protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { int rank = getRank(); int[] oshape = getShape(); int alen = oshape[axis]; oshape[axis] = 1;//from ww w. j a v a 2s . com int[] nshape = ShapeUtils.squeezeShape(oshape, false); IntegerDataset count = new IntegerDataset(nshape); CompoundDoubleDataset sum = new CompoundDoubleDataset(isize, nshape); CompoundDoubleDataset mean = new CompoundDoubleDataset(isize, nshape); CompoundDoubleDataset var = new CompoundDoubleDataset(isize, nshape); IndexIterator qiter = count.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape; double[] darray = new double[isize]; while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics[] stats = new SummaryStatistics[isize]; for (int k = 0; k < isize; k++) { stats[k] = new SummaryStatistics(); } for (int j = 0; j < alen; j++) { spos[axis] = j; getDoubleArray(darray, spos); boolean skip = false; for (int k = 0; k < isize; k++) { double v = darray[k]; if (ignoreNaNs && Double.isNaN(v)) { skip = true; break; } if (ignoreInfs && Double.isInfinite(v)) { skip = true; break; } } if (!skip) for (int k = 0; k < isize; k++) { stats[k].addValue(darray[k]); } } count.setAbs(qiter.index, (int) stats[0].getN()); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getSum(); } sum.set(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getMean(); } mean.set(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getVariance(); } var.set(darray, qpos); } setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var); }
From source file:org.eclipse.january.dataset.AbstractDataset.java
/** * Calculate summary statistics for a dataset * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities * @param name/*from w w w. ja v a2 s . c o m*/ */ protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final String name) { final IndexIterator iter = getIterator(); final SummaryStatistics stats = new SummaryStatistics(); //sum of logs is slow and we dont use it, so blocking its calculation here stats.setSumLogImpl(new NullStorelessUnivariateStatistic()); if (storedValues == null || !storedValues.containsKey(STORE_HASH)) { boolean hasNaNs = false; double hash = 0; while (iter.hasNext()) { final double val = getElementDoubleAbs(iter.index); if (Double.isNaN(val)) { hash = (hash * 19) % Integer.MAX_VALUE; if (ignoreNaNs) continue; hasNaNs = true; } else if (Double.isInfinite(val)) { hash = (hash * 19) % Integer.MAX_VALUE; if (ignoreInfs) continue; } else { hash = (hash * 19 + val) % Integer.MAX_VALUE; } stats.addValue(val); } int ihash = ((int) hash) * 19 + getDType() * 17 + getElementsPerItem(); setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_SHAPELESS_HASH), ihash); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX), hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMax())); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN), hasNaNs ? Double.NaN : fromDoubleToNumber(stats.getMin())); storedValues.put(name, stats); } else { while (iter.hasNext()) { final double val = getElementDoubleAbs(iter.index); if (ignoreNaNs && Double.isNaN(val)) { continue; } if (ignoreInfs && Double.isInfinite(val)) { continue; } stats.addValue(val); } storedValues.put(name, stats); } }
From source file:org.eclipse.january.dataset.AbstractDataset.java
/** * Calculate summary statistics for a dataset along an axis * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities * @param axis/*from w ww .j a v a 2 s.c om*/ */ protected void calculateSummaryStats(final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { int rank = getRank(); int[] oshape = getShape(); int alen = oshape[axis]; oshape[axis] = 1; int[] nshape = new int[rank - 1]; for (int i = 0; i < axis; i++) { nshape[i] = oshape[i]; } for (int i = axis + 1; i < rank; i++) { nshape[i - 1] = oshape[i]; } final int dtype = getDType(); IntegerDataset count = new IntegerDataset(nshape); Dataset max = DatasetFactory.zeros(nshape, dtype); Dataset min = DatasetFactory.zeros(nshape, dtype); IntegerDataset maxIndex = new IntegerDataset(nshape); IntegerDataset minIndex = new IntegerDataset(nshape); Dataset sum = DatasetFactory.zeros(nshape, DTypeUtils.getLargestDType(dtype)); DoubleDataset mean = new DoubleDataset(nshape); DoubleDataset var = new DoubleDataset(nshape); IndexIterator qiter = max.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape.clone(); while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics stats = new SummaryStatistics(); //sum of logs is slow and we dont use it, so blocking its calculation here stats.setSumLogImpl(new NullStorelessUnivariateStatistic()); double amax = Double.NEGATIVE_INFINITY; double amin = Double.POSITIVE_INFINITY; boolean hasNaNs = false; if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) { hasNaNs = true; continue; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } if (val > amax) { amax = val; } if (val < amin) { amin = val; } stats.addValue(val); } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (hasNaNs) { if (!Double.isNaN(val)) stats.addValue(0); continue; } if (Double.isNaN(val)) { amax = Double.NaN; amin = Double.NaN; hasNaNs = true; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } else { if (val > amax) { amax = val; } if (val < amin) { amin = val; } } stats.addValue(val); } } count.setAbs(qiter.index, (int) stats.getN()); max.setObjectAbs(qiter.index, amax); min.setObjectAbs(qiter.index, amin); boolean fmax = false; boolean fmin = false; if (hasNaNs) { if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) continue; if (!fmax && val == amax) { maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isNaN(val)) { maxIndex.setAbs(qiter.index, j); minIndex.setAbs(qiter.index, j); break; } } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (!fmax && val == amax) { maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } sum.setObjectAbs(qiter.index, stats.getSum()); mean.setAbs(qiter.index, stats.getMean()); var.setAbs(qiter.index, stats.getVariance()); } setStoredValue(storeName(ignoreNaNs, ignoreInfs, STORE_COUNT + "-" + axis), count); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + "-" + axis), max); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + "-" + axis), min); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_SUM + "-" + axis), sum); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MEAN + "-" + axis), mean); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_VAR + "-" + axis), var); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MAX + STORE_INDEX + "-" + axis), maxIndex); storedValues.put(storeName(ignoreNaNs, ignoreInfs, STORE_MIN + STORE_INDEX + "-" + axis), minIndex); }
From source file:org.eclipse.january.metadata.internal.StatisticsMetadataImpl.java
/** * Calculate summary statistics for a dataset * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities *///from w w w . ja va2 s .c om @SuppressWarnings("unchecked") private SummaryStatistics[] createSummaryStats(final MaxMin<T> mm, final boolean ignoreNaNs, final boolean ignoreInfs) { final IndexIterator iter = dataset.getIterator(); SummaryStatistics[] istats = new SummaryStatistics[isize]; for (int i = 0; i < isize; i++) { istats[i] = new SummaryStatistics(); // sum of logs is slow and we don't use it, so blocking its calculation here istats[i].setSumLogImpl(new NullStorelessUnivariateStatistic()); } SummaryStatistics stats; if (isize == 1) { boolean hasNaNs = false; stats = istats[0]; if (dataset.hasFloatingPointElements() && (ignoreNaNs || ignoreInfs)) { while (iter.hasNext()) { final double val = dataset.getElementDoubleAbs(iter.index); hash = (int) (hash * 19 + Double.doubleToRawLongBits(val)); if (Double.isNaN(val)) { if (ignoreNaNs) continue; hasNaNs = true; } else if (Double.isInfinite(val)) { if (ignoreInfs) continue; } stats.addValue(val); } } else if (dataset.hasFloatingPointElements()) { while (iter.hasNext()) { final double val = dataset.getElementDoubleAbs(iter.index); hash = (int) (hash * 19 + Double.doubleToRawLongBits(val)); if (Double.isNaN(val)) { hasNaNs = true; } stats.addValue(val); } } else { while (iter.hasNext()) { final long val = dataset.getElementLongAbs(iter.index); hash = (int) (hash * 19 + val); stats.addValue(val); } } mm.maximum = (T) (hasNaNs ? Double.NaN : DTypeUtils.fromDoubleToBiggestNumber(stats.getMax(), dtype)); mm.minimum = (T) (hasNaNs ? Double.NaN : DTypeUtils.fromDoubleToBiggestNumber(stats.getMin(), dtype)); } else { double[] vals = new double[isize]; while (iter.hasNext()) { boolean okay = true; for (int j = 0; j < isize; j++) { final double val = dataset.getElementDoubleAbs(iter.index + j); if (ignoreNaNs && Double.isNaN(val)) { okay = false; break; } if (ignoreInfs && Double.isInfinite(val)) { okay = false; break; } vals[j] = val; } if (okay) { for (int j = 0; j < isize; j++) { double val = vals[j]; istats[j].addValue(val); hash = (int) (hash * 19 + Double.doubleToRawLongBits(val)); } } } double[] lmax = new double[isize]; double[] lmin = new double[isize]; for (int j = 0; j < isize; j++) { stats = istats[j]; lmax[j] = stats.getMax(); lmin[j] = stats.getMin(); } mm.maximum = (T) lmax; mm.minimum = (T) lmin; } hash = hash * 19 + dtype * 17 + isize; mm.maximumPositions = null; mm.minimumPositions = null; return istats; }
From source file:org.eclipse.january.metadata.internal.StatisticsMetadataImpl.java
/** * Calculate summary statistics for a dataset along an axis * @param ignoreNaNs if true, ignore NaNs * @param ignoreInfs if true, ignore infinities * @param axis//from w w w . jav a 2s . c o m */ @SuppressWarnings("deprecation") private Dataset[] createAxisStats(final int axis, final boolean ignoreNaNs, final boolean ignoreInfs) { int rank = dataset.getRank(); int[] oshape = dataset.getShape(); int alen = oshape[axis]; oshape[axis] = 1; int[] nshape = new int[rank - 1]; for (int i = 0; i < axis; i++) { nshape[i] = oshape[i]; } for (int i = axis + 1; i < rank; i++) { nshape[i - 1] = oshape[i]; } Dataset max; Dataset min; IntegerDataset maxIndex; IntegerDataset minIndex; LongDataset count = DatasetFactory.zeros(LongDataset.class, nshape); Dataset sum; Dataset mean; Dataset var; if (isize == 1) { max = DatasetFactory.zeros(nshape, dtype); min = DatasetFactory.zeros(nshape, dtype); maxIndex = DatasetFactory.zeros(IntegerDataset.class, nshape); minIndex = DatasetFactory.zeros(IntegerDataset.class, nshape); sum = DatasetFactory.zeros(nshape, DTypeUtils.getLargestDType(dtype)); mean = DatasetFactory.zeros(DoubleDataset.class, nshape); var = DatasetFactory.zeros(DoubleDataset.class, nshape); } else { max = null; min = null; maxIndex = null; minIndex = null; sum = DatasetFactory.zeros(isize, nshape, DTypeUtils.getLargestDType(dtype)); mean = DatasetFactory.zeros(isize, CompoundDoubleDataset.class, nshape); var = DatasetFactory.zeros(isize, CompoundDoubleDataset.class, nshape); } IndexIterator qiter = count.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape.clone(); if (isize == 1) { DoubleDataset lmean = (DoubleDataset) mean; DoubleDataset lvar = (DoubleDataset) var; final SummaryStatistics stats = new SummaryStatistics(); while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } stats.clear(); //sum of logs is slow and we dont use it, so blocking its calculation here stats.setSumLogImpl(new NullStorelessUnivariateStatistic()); double amax = Double.NEGATIVE_INFINITY; double amin = Double.POSITIVE_INFINITY; boolean hasNaNs = false; if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = dataset.getDouble(spos); if (Double.isNaN(val)) { hasNaNs = true; continue; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } if (val > amax) { amax = val; } if (val < amin) { amin = val; } stats.addValue(val); } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = dataset.getDouble(spos); if (hasNaNs) { if (!Double.isNaN(val)) stats.addValue(0); continue; } if (Double.isNaN(val)) { amax = Double.NaN; amin = Double.NaN; hasNaNs = true; } else if (ignoreInfs && Double.isInfinite(val)) { continue; } else { if (val > amax) { amax = val; } if (val < amin) { amin = val; } } stats.addValue(val); } } count.setAbs(qiter.index, stats.getN()); max.set(amax, qpos); min.set(amin, qpos); boolean fmax = false; boolean fmin = false; if (hasNaNs) { if (ignoreNaNs) { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = dataset.getDouble(spos); if (Double.isNaN(val)) continue; if (!fmax && val == amax) { // FIXME qiter.index is wrong!!! maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = dataset.getDouble(spos); if (Double.isNaN(val)) { maxIndex.setAbs(qiter.index, j); minIndex.setAbs(qiter.index, j); break; } } } } else { for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = dataset.getDouble(spos); if (!fmax && val == amax) { maxIndex.setAbs(qiter.index, j); fmax = true; if (fmin) break; } if (!fmin && val == amin) { minIndex.setAbs(qiter.index, j); fmin = true; if (fmax) break; } } } sum.setObjectAbs(qiter.index, stats.getSum()); lmean.setAbs(qiter.index, stats.getMean()); lvar.setAbs(qiter.index, stats.getVariance()); } } else { CompoundDataset ldataset = (CompoundDataset) dataset; CompoundDoubleDataset lmean = (CompoundDoubleDataset) mean; CompoundDoubleDataset lvar = (CompoundDoubleDataset) var; double[] darray = new double[isize]; while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics[] stats = new SummaryStatistics[isize]; for (int k = 0; k < isize; k++) { stats[k] = new SummaryStatistics(); } for (int j = 0; j < alen; j++) { spos[axis] = j; ldataset.getDoubleArray(darray, spos); boolean skip = false; for (int k = 0; k < isize; k++) { double v = darray[k]; if (ignoreNaNs && Double.isNaN(v)) { skip = true; break; } if (ignoreInfs && Double.isInfinite(v)) { skip = true; break; } } if (!skip) for (int k = 0; k < isize; k++) { stats[k].addValue(darray[k]); } } count.setAbs(qiter.index, (int) stats[0].getN()); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getSum(); } sum.set(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getMean(); } lmean.setItem(darray, qpos); for (int k = 0; k < isize; k++) { darray[k] = stats[k].getVariance(); } lvar.setItem(darray, qpos); } } return new Dataset[] { max, min, maxIndex, minIndex, count, mean, sum, var }; }
From source file:org.hoidla.window.WindowUtils.java
/** * Identifies outliers// w w w. jav a 2 s . c om * @param data * @param outlierThresholdFactor * @param pattern * @return indexes outlying data points */ public static List<Integer> removeOutliers(double[] data, int outlierThresholdFactor, double[] pattern) { if (null != pattern && data.length != pattern.length) { throw new IllegalArgumentException("data and pattern need to be of same size"); } //stats SummaryStatistics stats = new SummaryStatistics(); for (double value : data) { stats.addValue(value); } double mean = stats.getMean(); double stdDev = stats.getStandardDeviation(); //thresholds double upThreshold = mean + outlierThresholdFactor * stdDev; double loThreshold = mean - outlierThresholdFactor * stdDev; //detect outliers List<Integer> outliers = new ArrayList<Integer>(); int i = 0; for (double value : data) { if (value > upThreshold || value < loThreshold) { //replace with pattern value so that there is no net effect if (null != pattern) { data[i] = pattern[i]; } outliers.add(i); } ++i; } return outliers; }