Example usage for org.apache.commons.math3.distribution HypergeometricDistribution upperCumulativeProbability

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution HypergeometricDistribution upperCumulativeProbability.

Prototype

public double upperCumulativeProbability(int x)

Source Link

Document

For this distribution, X , this method returns P(X >= x) .

Usage

From source file:edu.sdsc.scigraph.analyzer.HyperGeometricAnalyzer.java

public List<AnalyzerResult> analyze(AnalyzeRequest request) {
    List<AnalyzerResult> pValues = new ArrayList<>();
    try (Transaction tx = graphDb.beginTx()) {
        AnalyzeRequest processedRequest = processRequest(request);

        Set<AnalyzerResult> sampleSetNodes = getSampleSetNodes(processedRequest);

        Set<AnalyzerResult> completeSetNodes = getCompleteSetNodes(processedRequest);

        int totalCount = getTotalCount(processedRequest.getOntologyClass());

        // apply the HyperGeometricDistribution for each node
        for (AnalyzerResult n : sampleSetNodes) {
            HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount,
                    (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size());
            double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount());
            pValues.add(new AnalyzerResult(n.getNodeId(), p));
        }/*w  w w  . j  a  va2 s. c om*/

        // sort by p-value
        Collections.sort(pValues, new AnalyzerResultComparator());

        tx.success();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return pValues;
}

From source file:io.scigraph.analyzer.HyperGeometricAnalyzer.java

public List<AnalyzerResult> analyze(AnalyzeRequest request) {
    List<AnalyzerResult> pValues = new ArrayList<>();
    try (Transaction tx = graphDb.beginTx()) {
        AnalyzeRequest processedRequest = processRequest(request);

        Set<AnalyzerInnerNode> sampleSetNodes = getSampleSetNodes(processedRequest);

        Set<AnalyzerInnerNode> completeSetNodes = getCompleteSetNodes(processedRequest);

        double bonferroniCoeff = computeBonferroniCoeff(completeSetNodes);

        int totalCount = getTotalCount(processedRequest.getOntologyClass());

        // apply the HyperGeometricDistribution for each node
        for (AnalyzerInnerNode n : sampleSetNodes) {
            HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount,
                    (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size());
            double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount())
                    * bonferroniCoeff;/*w w w . ja  v  a 2  s.  c om*/
            String iri = graph.getNodeProperty(n.getNodeId(), CommonProperties.IRI, String.class).get();
            String curie = curieUtil.getCurie(iri).or(iri);
            String labels = StringUtils
                    .join(graph.getNodeProperties(n.getNodeId(), NodeProperties.LABEL, String.class), ", ");
            pValues.add(new AnalyzerResult(labels, curie, p));
        }

        // sort by p-value
        Collections.sort(pValues, new AnalyzerResultComparator());

        tx.success();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return pValues;
}

From source file:it.iit.genomics.cru.simsearch.bundle.utils.AnnotationsFromTrack.java

public void process(String trackId, Collection<TopkResult> results) {

    // Chromosome n

    // for each annotation

    // for each result
    // for each region
    // if overlapp
    // --> add annotation
    // next annotation
    // next result.

    annotationsCount = new HashMap<>();
    annotationsMappedCount = new HashMap<>();

    // Order topkresults
    HashMap<String, ArrayList<Region>> orderedRegions = new HashMap<>();

    logger.info("Order regions");

    try {//from w  ww.  java  2 s. co  m

        for (TopkResult result : results) {
            for (String datasetId : result.getPositiveMatchDatasetIds()) {
                if (result.getDataset(datasetId) == null) {
                    continue;
                }

                if (result.getDataset(datasetId).getRegions() == null) {
                    continue;
                }

                for (Region region : result.getDataset(datasetId).getRegions()) {

                    if (region == null) {
                        logger.info("null region");
                        continue;
                    }

                    if (region.getChromosome() == null) {
                        logger.info("null chromosome");
                        continue;
                    }

                    ArrayList<Region> regions = orderedRegions.get(region.getChromosome());

                    if (regions == null) {
                        regions = new ArrayList<Region>();
                        orderedRegions.put(region.getChromosome(), regions);
                    }

                    int i = 0;
                    for (; i < regions.size(); i++) {
                        if (regions.get(i).getLeft() > region.getLeft()) {
                            break;
                        }
                    }
                    regions.add(i, region);
                }
            }
        }

    } catch (Exception e) {
        logger.severe(e.getMessage());
        e.printStackTrace();
    }

    logger.info("Regions ordered");

    Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion())
            .ifPresent(selectedGenomeVersion -> {
                selectedGenomeVersion.getAvailableDataContainers().stream()
                        .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible())
                        .filter(dataSet -> dataSet.getDataSetName().equals(trackId))
                        .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()),
                                dataSet.getURI(), Optional.empty(),
                                DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion))
                        .forEach(symLoader -> {
                            selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> {

                                String chromosome = bioSeq.getId();
                                if (chromosome.startsWith("chr") && false == chromosome.contains("_")) {
                                    logger.severe("chromosome: " + chromosome);
                                }
                                ArrayList<Region> regions = orderedRegions.get(chromosome);

                                if (regions != null) {

                                    // Iterator<Region> regionIt = regions.iterator();

                                    // Region currentResult = regionIt.next();
                                    // SeqSpan span = null;

                                    try {

                                        List<? extends SeqSymmetry> symmetries = symLoader
                                                .getChromosome(bioSeq);
                                        if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                                && symmetries != null && symmetries.size() > 0) {

                                            Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                                    .iterator();

                                            while (annotationSymIterator.hasNext()) {
                                                String annotation = null;
                                                SeqSymmetry seqSym = annotationSymIterator.next();
                                                // span = seqSym.getSpan(0);

                                                if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                                    annotation = ((SymWithProps) seqSym).getProperty("name")
                                                            .toString();
                                                } else {
                                                    // find the first text annotation
                                                    for (Object propertyValue : ((SymWithProps) seqSym)
                                                            .getProperties().values()) {
                                                        String s = propertyValue.toString();
                                                        if (s.matches("[A-Za-z]")) {
                                                            annotation = s;
                                                            break;
                                                        }
                                                    }
                                                }

                                                if (null != annotation) {
                                                    increase(annotationsCount, annotation);
                                                }

                                            }
                                        }
                                    } catch (Exception e) {
                                        logger.severe("pre-load annotations: " + bioSeq.getId());
                                    }
                                }
                            });
                        });
            });

    for (String annotation : annotationsCount.keySet()) {
        logger.severe("Annotation: " + annotation + " : " + annotationsCount.get(annotation));
    }

    Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion())
            .ifPresent(selectedGenomeVersion -> {
                selectedGenomeVersion.getAvailableDataContainers().stream()
                        .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible())
                        .filter(dataSet -> dataSet.getDataSetName().equals(trackId))
                        .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()),
                                dataSet.getURI(), Optional.empty(),
                                DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion))
                        .forEach(symLoader -> {
                            selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> {

                                String chromosome = bioSeq.getId();
                                if (chromosome.startsWith("chr") && false == chromosome.contains("_")) {
                                    logger.severe("chromosome: " + chromosome);
                                }
                                ArrayList<Region> regions = orderedRegions.get(chromosome);

                                if (regions != null) {

                                    Iterator<Region> regionIt = regions.iterator();

                                    Region currentResult = regionIt.next();
                                    SeqSpan span = null;

                                    String annotation = null;

                                    try {

                                        List<? extends SeqSymmetry> symmetries = symLoader
                                                .getChromosome(bioSeq);
                                        if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                                && symmetries != null && symmetries.size() > 0) {

                                            Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                                    .iterator();

                                            // logger.info("Iterator
                                            // ready");

                                            boolean nextResult = false;
                                            boolean nextAnnotation = true;

                                            while (nextResult || nextAnnotation) {

                                                if (nextAnnotation) {
                                                    // if (false == annotationSymIterator.hasNext()) {
                                                    // /**
                                                    // * Finish to read the matchings
                                                    // */
                                                    // break;
                                                    // }

                                                    SeqSymmetry seqSym = annotationSymIterator.next();

                                                    if (seqSym.getSpanCount() == 0) {
                                                        continue;
                                                    }

                                                    if (false == SymWithProps.class.isInstance(seqSym)) {
                                                        continue;
                                                    }

                                                    span = seqSym.getSpan(0);

                                                    if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                                        annotation = ((SymWithProps) seqSym).getProperty("name")
                                                                .toString();
                                                    } else {
                                                        // find the first text annotation
                                                        for (Object propertyValue : ((SymWithProps) seqSym)
                                                                .getProperties().values()) {
                                                            String s = propertyValue.toString();
                                                            if (s.matches("[A-Za-z]")) {
                                                                annotation = s;
                                                                break;
                                                            }
                                                        }
                                                    }

                                                    // if (null != annotation) {
                                                    // increase(annotationsCount, annotation);
                                                    // }
                                                    if (span.getMax() < currentResult.getLeft()) {
                                                        continue;
                                                    }

                                                    nextAnnotation = false;
                                                }

                                                if (nextResult) {
                                                    if (false == regionIt.hasNext()) {

                                                        break;
                                                    }
                                                    currentResult = regionIt.next();

                                                    if (currentResult.getRight() < span.getMin()) {
                                                        continue;
                                                    }

                                                    nextResult = false;

                                                }

                                                if (overlap(span, currentResult)) {
                                                    increase(annotationsMappedCount, annotation);
                                                    matchingMappedByAnnotationType.put(annotation,
                                                            currentResult.toString());
                                                    // next annotation
                                                    nextAnnotation = true;
                                                    nextResult = false;
                                                } else if (currentResult.getRight() < span.getMin()) {
                                                    nextResult = true;
                                                    nextAnnotation = false;
                                                } else {
                                                    nextResult = false;
                                                    nextAnnotation = true;
                                                    // nextResult =
                                                    // true;
                                                }
                                            }

                                        }

                                    } catch (Exception ex) {
                                        logger.severe(ex.getMessage() + ". Feature2 "
                                                + symLoader.getFeatureName() + ", " + bioSeq.getId());
                                        ex.printStackTrace();
                                    }
                                }
                                // else {
                                // GCount annotations anyway
                                /**
                                 * Finish to read the annotations
                                 */
                                // try{
                                // if (symLoader != null && bioSeq != null) {
                                // List<? extends SeqSymmetry> symmetries = symLoader.getChromosome(bioSeq);
                                // if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                // && symmetries != null && symmetries.size() > 0) {

                                // Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                // .iterator();

                                // while (annotationSymIterator.hasNext()) {
                                // SeqSymmetry seqSym = annotationSymIterator.next();

                                // if (seqSym.getSpanCount() == 0) {
                                // continue;
                                // }

                                // if (false == SymWithProps.class.isInstance(seqSym)) {
                                // continue;
                                // }

                                // SeqSpan span = seqSym.getSpan(0);
                                // String annotation = null;
                                // if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                // annotation = ((SymWithProps) seqSym).getProperty("name")
                                // .toString();
                                // } else {
                                // // find the first text annotation
                                // for (Object propertyValue : ((SymWithProps) seqSym)
                                // .getProperties().values()) {
                                // String s = propertyValue.toString();
                                // if (s.matches("[A-Za-z]")) {
                                // annotation = s;
                                // break;
                                // }
                                // }
                                // }

                                // if (null != annotation) {
                                // increase(annotationsCount, annotation);
                                // }
                                // }}
                                // /**
                                // * FINISHED READ ALL ANNOTATIONS
                                // */
                                // }
                                // } catch (Exception ex) {
                                // logger.severe(ex.getMessage() + ". Feature3 " + symLoader.getFeatureName()
                                // + ", " + bioSeq.getId());
                                // // ex.printStackTrace();
                                // }
                                // }
                            });
                        });
            });

    /**
     * Get pvalues
     */

    int populationSize = this.getNumberOfAnnotations();
    int sampleSize = this.getNumberOfAnnotationsMapped();

    for (String annotation : this.getAnnotations()) {
        int numberOfSuccesses = this.getNumberOfAnnotations(annotation);

        HypergeometricDistribution hd = new HypergeometricDistribution(populationSize, numberOfSuccesses,
                sampleSize);

        double probability = hd.upperCumulativeProbability(this.getNumberOfAnnotationsMapped(annotation));
        this.pValues.put(annotation, probability);
        logger.severe("pvalue: " + numberOfSuccesses + " | " + sampleSize + " | " + populationSize + " | "
                + this.getNumberOfAnnotationsMapped(annotation) + " = " + probability);
    }

    logger.info("Num annotations: " + this.getAnnotations().size() + " / " + annotationsCount.keySet().size());

}

From source file:org.intermine.web.logic.widget.EnrichmentCalculation.java

/**
 * Perform an enrichment calculation based on input from some source and return results that
 * include a p-value and count for each attribute that was observed in the sample.  Also perform
 * the type multiple hypothesis error correction specified before returning the results.
 * @param input details of the sample and population
 * @param maxValue the maximum p-value to return, for display purposes
 * @param errorCorrection the type of error correction to perform or None
 * @param extraCorrectionCoefficient if true correction coefficient has been selected
 * @param correctionCoefficient a instance of correction coefficient
 * @return results of the enrichment calculation
 *//*w  ww.  j a  va  2  s .c  o  m*/
public static EnrichmentResults calculate(EnrichmentInput input, Double maxValue, String errorCorrection,
        boolean extraCorrectionCoefficient, CorrectionCoefficient correctionCoefficient) {

    int sampleSize = input.getSampleSize();
    PopulationInfo population = input.getPopulationInfo();
    int populationSize = population.getSize();

    Map<String, Integer> sampleCounts = input.getAnnotatedCountsInSample();
    Map<String, PopulationInfo> annotatedPopulationInfo = input.getAnnotatedCountsInPopulation();

    Map<String, BigDecimal> rawResults = new HashMap<String, BigDecimal>();
    for (Map.Entry<String, Integer> entry : sampleCounts.entrySet()) {
        String attribute = entry.getKey();

        Integer sampleCount = entry.getValue();
        PopulationInfo pi = annotatedPopulationInfo.get(attribute);
        Integer populationCount = (pi != null) ? pi.getSize() : 0;

        HypergeometricDistribution h = new HypergeometricDistribution(populationSize, populationCount,
                sampleSize);
        Double pValue = h.upperCumulativeProbability(sampleCount);
        rawResults.put(attribute, new BigDecimal(pValue));
    }

    Map<String, BigDecimal> correctedResults = ErrorCorrection.adjustPValues(errorCorrection, rawResults,
            maxValue, input.getTestCount());
    if (extraCorrectionCoefficient && correctionCoefficient.isApplicable()) {
        correctionCoefficient.apply(correctedResults, population, annotatedPopulationInfo, maxValue);
    }
    Map<String, BigDecimal> sortedCorrectedResults = ErrorCorrection.sortMap(correctedResults);
    // record the number of items in the sample that had any values for the attribute
    int widgetTotal = rawResults.isEmpty() ? 0 : sampleSize;

    EnrichmentResults results = new EnrichmentResults(sortedCorrectedResults,
            input.getAnnotatedCountsInSample(), input.getLabels(), widgetTotal);

    return results;
}

From source file:ubc.pavlab.gotrack.beans.EnrichmentView.java

private void enrichmentAnalysis(Map<Edition, Map<Gene, Set<GeneOntologyTerm>>> geneGOMap,
        Map<Edition, Set<GeneOntologyTerm>> allTerms, int minAnnotedPopulation) {

    enrichmentData = new HashMap<>();
    boolean emptyChart = true;

    List<Edition> eds = new ArrayList<Edition>(geneGOMap.keySet());
    Collections.sort(eds, Collections.reverseOrder());
    boolean firstRun = true;
    Set<GeneOntologyTerm> termsToEnrich = new HashSet<>();

    for (Edition ed : eds) {

        Map<Gene, Set<GeneOntologyTerm>> data = geneGOMap.get(ed);

        int populationSize = cache.getGenePopulation(currentSpeciesId, ed);
        int k = data.keySet().size();

        Set<GeneOntologyTerm> termsInEdition = allTerms.get(ed);

        // // TODO Multiple tests correction lowered because we're testing less terms?
        // if ( !firstRun ) {
        // termsInEdition = new HashSet<>( termsInEdition );
        // termsInEdition.retainAll( termsToEnrich );
        // }//from   ww w.ja  v  a  2 s .  c  o  m

        Map<GeneOntologyTerm, Double> enrich = enrichmentData.get(ed);

        if (enrich == null) {
            enrich = new HashMap<>();
            enrichmentData.put(ed, enrich);
        }

        if (termsInEdition != null) {
            int bcorr = termsInEdition.size();
            for (GeneOntologyTerm term : termsInEdition) {

                if (!firstRun && !termsToEnrich.contains(term)) {
                    // Term is part of this edition however did not pass the threshold in the current edition
                    continue;
                }

                Integer populationAnnotated = cache.getGoSetSizes(currentSpeciesId, ed.getEdition(),
                        term.getGoId());

                if (populationAnnotated != null && populationAnnotated >= minAnnotedPopulation) {
                    HypergeometricDistribution hyper = new HypergeometricDistribution(populationSize,
                            populationAnnotated, k);

                    int r = 0;

                    for (Entry<Gene, Set<GeneOntologyTerm>> geneEntry : data.entrySet()) {
                        if (geneEntry.getValue().contains(term)) {
                            r++;
                        }
                    }

                    // log.info( Arrays.asList( populationSize, populationAnnotated, k, r ) );
                    Double res = hyper.upperCumulativeProbability(r);

                    if (bonferroniCorrection) {
                        res = bcorr * res;
                    }

                    res = Math.min(res, 1);

                    if (!firstRun || res <= pThreshold) {
                        enrich.put(term, res);
                        emptyChart = false;
                        if (firstRun) {
                            termsToEnrich.add(term);
                        }
                    } else {
                        // log.info( term + " skipped; p-value not in range" );
                    }

                    // log.info( "Edition: (" + ed.getEdition() + ") " + hyper.upperCumulativeProbability( r ) );

                } else {
                    // log.info( term + " skipped; too small or isn't annotated" );
                }
            }
        }
        firstRun = false;

    }

    if (!emptyChart) {
        enrichmentChart = createChart(enrichmentData, "Enrichment Stability", "Date", "p-value");
        chartReady = true;
        log.info("Chart created");
    } else {
        chartReady = false;
        log.info("Chart Empty");
    }

}