Example usage for org.apache.commons.math.stat.descriptive.rank Percentile Percentile

List of usage examples for org.apache.commons.math.stat.descriptive.rank Percentile Percentile

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive.rank Percentile Percentile.

Prototype

public Percentile() 

Source Link

Document

Constructs a Percentile with a default quantile value of 50.0.

Usage

From source file:edu.cornell.med.icb.learning.PercentileScalingRowProcessor.java

private void observeStatistics(final MutableString featureId, final int featureIndex,
        final double[] trimmedArray) {
    final Percentile lowerPercentile = new Percentile();
    lowerPercentile.setQuantile(20);/*from ww  w .  j  a va  2 s  .co m*/
    final double min = lowerPercentile.evaluate(trimmedArray);
    final Percentile higherPercentile = new Percentile();
    higherPercentile.setQuantile(80);
    final double max = higherPercentile.evaluate(trimmedArray);
    final Percentile medianPercentile = new Percentile();
    medianPercentile.setQuantile(50);
    final double median = medianPercentile.evaluate(trimmedArray);
    final double range = max - min;

    featureIndex2ScaleMedian[featureIndex] = median;
    featureIndex2ScaleRange[featureIndex] = range;
    if (featureId != null) {

        probesetScaleMedianMap.put(featureId, median);
        probesetScaleRangeMap.put(featureId, range);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace(String.format("training, featureIndex/columnId %d/%s lower: %f higher %f median %f ",
                featureIndex, featureId, min, max, median));
    }
}

From source file:edu.cornell.med.icb.goby.modes.CompactFileStatsMode.java

/**
 * Print statistics about a reads file in the Goby compact form.
 *
 * @param file The file to display statistics about
 * @throws IOException if the file cannot be read
 *//*from w ww.  j a  va2  s.  c o m*/
private void describeCompactReads(final File file) throws IOException {
    stream.printf("Compact reads filename = %s%n", file);

    // keep the read lengths for computing quantiles
    final DoubleArrayList readLengths = new DoubleArrayList();

    int minLength = Integer.MAX_VALUE;
    int maxLength = Integer.MIN_VALUE;

    int numberOfIdentifiers = 0;
    int numberOfDescriptions = 0;
    int numberOfSequences = 0;
    int numberOfSequencePairs = 0;
    int numberOfQualityScores = 0;
    int numberOfQualityScorePairs = 0;

    long totalReadLength = 0;
    long totalReadLengthPair = 0;
    final DistinctIntValueCounterBitSet allQueryIndices = new DistinctIntValueCounterBitSet();

    ReadsReader reader = null;
    boolean checkedForPaired = false;

    try {
        final long size = file.length();
        reader = new ReadsReader(FileUtils.openInputStream(file));
        for (final Reads.ReadEntry entry : reader) {
            final int readLength = entry.getReadLength();

            for (int i = 0; i < entry.getMetaDataCount(); i++) {
                Reads.MetaData metaData = entry.getMetaData(i);
                stream.printf("meta-data key=%s value=%s%n", metaData.getKey(), metaData.getValue());

            }

            // across this file
            allQueryIndices.observe(entry.getReadIndex());
            totalReadLength += readLength;
            totalReadLengthPair += entry.getReadLengthPair();

            // across all files
            numberOfReads++;
            numberOfDescriptions += entry.hasDescription() ? 1 : 0;
            cumulativeReadLength += readLength;

            if (verbose && entry.hasDescription()) {
                stream.println("Description found: " + entry.getDescription());
            }
            numberOfIdentifiers += entry.hasReadIdentifier() ? 1 : 0;
            if (verbose && entry.hasReadIdentifier()) {
                stream.printf("Identifier found: %s    /  size=%,d%n", entry.getReadIdentifier(), readLength);
            }
            numberOfSequences += entry.hasSequence() && !entry.getSequence().isEmpty() ? 1 : 0;
            final boolean samplePaired = entry.hasSequencePair() && !entry.getSequencePair().isEmpty();
            if (samplePaired) {
                numberOfSequencePairs += 1;
            }
            if (!checkedForPaired) {
                // Check only the very first entry.
                checkedForPaired = true;
                pairedSamples.add(samplePaired);
            }
            if (entry.hasQualityScores() && !entry.getQualityScores().isEmpty()) {
                numberOfQualityScores += 1;
                final int qualityLength = entry.getQualityScores().size();
                minQualityLength = Math.min(minQualityLength, qualityLength);
                maxQualityLength = Math.max(maxQualityLength, qualityLength);
            }

            numberOfQualityScorePairs += entry.hasQualityScoresPair() && !entry.getQualityScoresPair().isEmpty()
                    ? 1
                    : 0;

            // we only need to keep all the read lengths if quantiles are being computed
            if (computeQuantiles) {
                readLengths.add(readLength);
            }
            minLength = Math.min(minLength, readLength);
            maxLength = Math.max(maxLength, readLength);

            // adjust the min/max length of across all files
            minReadLength = Math.min(minReadLength, readLength);
            maxReadLength = Math.max(maxReadLength, readLength);
        }

        stream.printf("Average bytes per entry: %f%n", divide(size, allQueryIndices.count()));
        stream.printf("Average bytes per base: %f%n", divide(size, cumulativeReadLength));
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
    final int numReadEntries = allQueryIndices.count();
    stream.printf("Has identifiers = %s (%,d) %n", numberOfIdentifiers > 0, numberOfIdentifiers);
    stream.printf("Has descriptions = %s (%,d) %n", numberOfDescriptions > 0, numberOfDescriptions);
    stream.printf("Has sequences = %s (%,d) %n", numberOfSequences > 0, numberOfSequences);
    stream.printf("Has sequencePairs = %s (%,d) %n", numberOfSequencePairs > 0, numberOfSequencePairs);
    stream.printf("Has quality scores = %s (%,d) %n", numberOfQualityScores > 0, numberOfQualityScores);
    stream.printf("Has quality score Pairs = %s (%,d) %n", numberOfQualityScorePairs > 0,
            numberOfQualityScorePairs);

    stream.printf("Number of entries = %,d%n", numReadEntries);
    stream.printf("Min read length = %,d%n", numReadEntries > 0 ? minLength : 0);
    stream.printf("Max read length = %,d%n", numReadEntries > 0 ? maxLength : 0);
    stream.printf("Min quality length = %,d%n", numberOfQualityScores > 0 ? minQualityLength : 0);
    stream.printf("Max quality length = %,d%n", numberOfQualityScores > 0 ? maxQualityLength : 0);
    stream.printf("Avg read length = %,d%n", numReadEntries > 0 ? totalReadLength / numReadEntries : 0);
    stream.printf("Avg read pair length = %,d%n",
            numReadEntries > 0 ? totalReadLengthPair / numReadEntries : 0);

    // compute quantiles
    if (computeQuantiles) {
        final Percentile percentile = new Percentile();
        final double[] increasingReadLengths = readLengths.toDoubleArray();
        Arrays.sort(increasingReadLengths);
        stream.printf("Read length quantiles = [ ");
        for (int quantile = 1; quantile < numberOfQuantiles + 1; quantile++) {
            stream.printf("%,f ", percentile.evaluate(increasingReadLengths, quantile));
        }
        stream.printf("]%n");
    }
}

From source file:com.atolcd.pentaho.di.trans.steps.gisgroupby.GisGroupBy.java

/**
 * Used for junits in GroupByAggregationNullsTest
 * /*w w w  .  ja  va2 s  . c om*/
 * @return
 * @throws KettleValueException
 */
Object[] getAggregateResult() throws KettleValueException {
    Object[] result = new Object[data.subjectnrs.length];

    if (data.subjectnrs != null) {
        for (int i = 0; i < data.subjectnrs.length; i++) {
            Object ag = data.agg[i];
            switch (meta.getAggregateType()[i]) {
            case GisGroupByMeta.TYPE_GROUP_SUM:
                break;
            case GisGroupByMeta.TYPE_GROUP_AVERAGE:
                ag = ValueDataUtil.divide(data.aggMeta.getValueMeta(i), ag,
                        new ValueMeta("c", ValueMetaInterface.TYPE_INTEGER), new Long(data.counts[i]));
                break;
            case GisGroupByMeta.TYPE_GROUP_MEDIAN:
            case GisGroupByMeta.TYPE_GROUP_PERCENTILE:
                double percentile = 50.0;
                if (meta.getAggregateType()[i] == GisGroupByMeta.TYPE_GROUP_PERCENTILE) {
                    percentile = Double.parseDouble(meta.getValueField()[i]);
                }
                @SuppressWarnings("unchecked")
                List<Double> valuesList = (List<Double>) data.agg[i];
                double[] values = new double[valuesList.size()];
                for (int v = 0; v < values.length; v++) {
                    values[v] = valuesList.get(v);
                }
                ag = new Percentile().evaluate(values, percentile);
                break;
            case GisGroupByMeta.TYPE_GROUP_COUNT_ANY:
            case GisGroupByMeta.TYPE_GROUP_COUNT_ALL:
                ag = new Long(data.counts[i]);
                break;
            case GisGroupByMeta.TYPE_GROUP_COUNT_DISTINCT:
                break;
            case GisGroupByMeta.TYPE_GROUP_MIN:
                break;
            case GisGroupByMeta.TYPE_GROUP_MAX:
                break;
            case GisGroupByMeta.TYPE_GROUP_STANDARD_DEVIATION:
                double sum = (Double) ag / data.counts[i];
                ag = Double.valueOf(Math.sqrt(sum));
                break;
            case GisGroupByMeta.TYPE_GROUP_CONCAT_COMMA:
            case GisGroupByMeta.TYPE_GROUP_CONCAT_STRING:
                ag = ((StringBuilder) ag).toString();
                break;

            // GIS : Union des gomtries
            case GisGroupByMeta.TYPE_GROUP_GEOMETRY_UNION:

                Geometry geomUnionGroup = ((GeometryInterface) data.aggMeta.getValueMeta(i)).getGeometry(ag);
                UnaryUnionOp unionOperateor = new UnaryUnionOp(geomUnionGroup);
                Geometry geometryUnion = unionOperateor.union();
                geometryUnion = GeometryUtils.getMergedGeometry(geometryUnion);
                geometryUnion.setSRID(geometrySRID);
                ag = geometryUnion;
                break;

            // GIS : Extent des gomtries
            case GisGroupByMeta.TYPE_GROUP_GEOMETRY_EXTENT:

                Geometry geomExtentGroup = ((GeometryInterface) data.aggMeta.getValueMeta(i)).getGeometry(ag);
                Geometry geomtryExtent = geomExtentGroup.getEnvelope();
                geomtryExtent.setSRID(geometrySRID);
                ag = geomtryExtent;
                break;

            // GIS : Aggrgation des gomtries
            case GisGroupByMeta.TYPE_GROUP_GEOMETRY_AGG:

                Geometry geomAggGroup = ((GeometryInterface) data.aggMeta.getValueMeta(i)).getGeometry(ag);
                Geometry geomtryAgg = geomAggGroup;
                geomtryAgg.setSRID(geometrySRID);
                ag = geomtryAgg;
                break;

            default:
                break;
            }
            /*
             * if ( ag == null && allNullsAreZero ) { // PDI-10250, 6960
             * seems all rows for min function was nulls... // get output
             * subject meta based on original subject meta calculation
             * ValueMetaInterface vm = data.aggMeta.getValueMeta( i );
             * 
             * ag = ValueDataUtil.getZeroForValueMetaType( vm ); }
             */
            result[i] = ag;
        }
    }
    return result;

}

From source file:org.drugepi.hdps.ZBiasCalculator.java

public static void scoreVariables(List<HdpsVariable> variableList) {
    // copy variables list
    List<HdpsVariable> expSortVariableList = new ArrayList<HdpsVariable>();
    List<HdpsVariable> outcomeSortVariableList = new ArrayList<HdpsVariable>();

    for (HdpsVariable var : variableList) {
        var.zBiasScore = 0;

        if ((var.expAssocRankingVariable != HdpsVariable.INVALID)
                && (var.outcomeAssocRankingVariable != HdpsVariable.INVALID)) {
            expSortVariableList.add(var);
            outcomeSortVariableList.add(var);
        }/* w  w  w .  j  a v a 2 s. c  om*/
    }

    // sort variables by exposure association (strongest first) 
    Collections.sort(expSortVariableList, new HdpsVariableReverseExposureAssociationComparator());

    // sort variables by outcome association (weakest first) 
    Collections.sort(outcomeSortVariableList, new HdpsVariableReverseOutcomeAssociationComparator());
    Collections.reverse(outcomeSortVariableList);

    // create an array of outcome strengths
    double[] outcomeStrengths = new double[outcomeSortVariableList.size()];
    for (int i = 0; i < outcomeStrengths.length; i++)
        outcomeStrengths[i] = outcomeSortVariableList.get(i).outcomeAssocRankingVariable;

    // array that will store breaks between deciles
    // find the median of outcome strength 
    Percentile pctile = new Percentile();

    // Find quintiles 1 through 5 of outcome weakness
    // AMONG the weakest half of the variables.
    // List is sorted strongest first, so the weakest variables 
    // will be at the end
    // quintile 1 = weakest 
    // don't use startsOfQuintile[0]

    double median = pctile.evaluate(outcomeStrengths, 50.0);
    int searchCeiling = Arrays.binarySearch(outcomeStrengths, median);
    if (searchCeiling < 0)
        searchCeiling = -(searchCeiling + 1);

    int startsOfQuintile[] = new int[7];

    for (int quintile = 1; quintile <= 5; quintile++) {
        // find the probability that *begins* this quintile
        double p = (quintile - 1) * 20;
        if (p > 0) {
            double quintileStartP = pctile.evaluate(outcomeStrengths, 0, searchCeiling, (quintile - 1) * 20);

            startsOfQuintile[quintile] = Arrays.binarySearch(outcomeStrengths, quintileStartP);
            if (startsOfQuintile[quintile] < 0)
                startsOfQuintile[quintile] = -(startsOfQuintile[quintile] + 1);
        } else
            startsOfQuintile[quintile] = 0;
    }
    startsOfQuintile[6] = searchCeiling;

    // score the variables, BUT make quintile 5 the weakest
    for (int quintile = 1; quintile <= 5; quintile++) {
        for (int i = startsOfQuintile[quintile]; i < startsOfQuintile[quintile + 1]; i++) {
            HdpsVariable v = outcomeSortVariableList.get(i);
            v.zBiasScore = 6 - quintile;
        }
    }

    //      for (HdpsVariable v: outcomeSortVariableList) {
    //         System.out.printf("%s    %1.4f    %d\n", v.varName, v.outcomeAssocRankingVariable, v.zBiasScore);
    //      }
}

From source file:org.pentaho.di.trans.steps.groupby.GroupBy.java

/**
 * Used for junits in GroupByAggregationNullsTest
 * @return/*from  ww w  .j  a  v  a  2s. com*/
 * @throws KettleValueException
 */
Object[] getAggregateResult() throws KettleValueException {
    Object[] result = new Object[data.subjectnrs.length];

    if (data.subjectnrs != null) {
        for (int i = 0; i < data.subjectnrs.length; i++) {
            Object ag = data.agg[i];
            switch (meta.getAggregateType()[i]) {
            case GroupByMeta.TYPE_GROUP_SUM:
                break;
            case GroupByMeta.TYPE_GROUP_AVERAGE:
                ag = ValueDataUtil.divide(data.aggMeta.getValueMeta(i), ag,
                        new ValueMeta("c", ValueMetaInterface.TYPE_INTEGER), new Long(data.counts[i]));
                break;
            case GroupByMeta.TYPE_GROUP_MEDIAN:
            case GroupByMeta.TYPE_GROUP_PERCENTILE:
                double percentile = 50.0;
                if (meta.getAggregateType()[i] == GroupByMeta.TYPE_GROUP_PERCENTILE) {
                    percentile = Double.parseDouble(meta.getValueField()[i]);
                }
                @SuppressWarnings("unchecked")
                List<Double> valuesList = (List<Double>) data.agg[i];
                double[] values = new double[valuesList.size()];
                for (int v = 0; v < values.length; v++) {
                    values[v] = valuesList.get(v);
                }
                ag = new Percentile().evaluate(values, percentile);
                break;
            case GroupByMeta.TYPE_GROUP_COUNT_ANY:
            case GroupByMeta.TYPE_GROUP_COUNT_ALL:
                ag = new Long(data.counts[i]);
                break;
            case GroupByMeta.TYPE_GROUP_COUNT_DISTINCT:
                break;
            case GroupByMeta.TYPE_GROUP_MIN:
                break;
            case GroupByMeta.TYPE_GROUP_MAX:
                break;
            case GroupByMeta.TYPE_GROUP_STANDARD_DEVIATION:
                double sum = (Double) ag / data.counts[i];
                ag = Double.valueOf(Math.sqrt(sum));
                break;
            case GroupByMeta.TYPE_GROUP_CONCAT_COMMA:
            case GroupByMeta.TYPE_GROUP_CONCAT_STRING:
                ag = ((StringBuilder) ag).toString();
                break;
            default:
                break;
            }
            if (ag == null && allNullsAreZero) {
                // PDI-10250, 6960 seems all rows for min function was nulls...
                // get output subject meta based on original subject meta calculation
                ValueMetaInterface vm = data.aggMeta.getValueMeta(i);
                ag = ValueDataUtil.getZeroForValueMetaType(vm);
            }
            result[i] = ag;
        }
    }
    return result;

}

From source file:org.pentaho.di.trans.steps.memgroupby.MemoryGroupBy.java

/**
 * Used for junits in MemoryGroupByAggregationNullsTest
 * @param aggregate// w ww. j ava2  s  .com
 * @return
 * @throws KettleValueException
 */
Object[] getAggregateResult(Aggregate aggregate) throws KettleValueException {
    Object[] result = new Object[data.subjectnrs.length];

    if (data.subjectnrs != null) {
        for (int i = 0; i < data.subjectnrs.length; i++) {
            Object ag = aggregate.agg[i];
            switch (meta.getAggregateType()[i]) {
            case MemoryGroupByMeta.TYPE_GROUP_SUM:
                break;
            case MemoryGroupByMeta.TYPE_GROUP_AVERAGE:
                ag = ValueDataUtil.divide(data.aggMeta.getValueMeta(i), ag,
                        new ValueMeta("c", ValueMetaInterface.TYPE_INTEGER), new Long(aggregate.counts[i]));
                break;
            case MemoryGroupByMeta.TYPE_GROUP_MEDIAN:
            case MemoryGroupByMeta.TYPE_GROUP_PERCENTILE:
                double percentile = 50.0;
                if (meta.getAggregateType()[i] == MemoryGroupByMeta.TYPE_GROUP_PERCENTILE) {
                    percentile = Double.parseDouble(meta.getValueField()[i]);
                }
                @SuppressWarnings("unchecked")
                List<Double> valuesList = (List<Double>) aggregate.agg[i];
                double[] values = new double[valuesList.size()];
                for (int v = 0; v < values.length; v++) {
                    values[v] = valuesList.get(v);
                }
                ag = new Percentile().evaluate(values, percentile);
                break;
            case MemoryGroupByMeta.TYPE_GROUP_COUNT_ANY:
            case MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL:
                ag = new Long(aggregate.counts[i]);
                break;
            case MemoryGroupByMeta.TYPE_GROUP_COUNT_DISTINCT:
                break;
            case MemoryGroupByMeta.TYPE_GROUP_MIN:
                break;
            case MemoryGroupByMeta.TYPE_GROUP_MAX:
                break;
            case MemoryGroupByMeta.TYPE_GROUP_STANDARD_DEVIATION:
                double sum = (Double) ag / aggregate.counts[i];
                ag = Double.valueOf(Math.sqrt(sum));
                break;
            case MemoryGroupByMeta.TYPE_GROUP_CONCAT_COMMA:
            case MemoryGroupByMeta.TYPE_GROUP_CONCAT_STRING:
                ag = ((StringBuilder) ag).toString();
                break;
            default:
                break;
            }
            if (ag == null && allNullsAreZero) {
                // PDI-11530 seems all rows for min function was nulls...
                ValueMetaInterface vm = data.aggMeta.getValueMeta(i);
                ag = ValueDataUtil.getZeroForValueMetaType(vm);
            }
            result[i] = ag;
        }
    }

    return result;

}