Example usage for org.apache.commons.math3.distribution HypergeometricDistribution HypergeometricDistribution

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution HypergeometricDistribution HypergeometricDistribution.

Prototype

public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
        throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException

Source Link

Document

Construct a new hypergeometric distribution with the specified population size, number of successes in the population, and sample size.

Usage

From source file:jasima.core.random.discrete.IntHypergeometric.java

public IntHypergeometric(int populationSize, int numberOfSuccesses, int sampleSize) {
    super();//  w  w  w .  j  a v a2s  .co  m
    setDistribution(new HypergeometricDistribution(populationSize, numberOfSuccesses, sampleSize));
}

From source file:jasima.core.random.discrete.IntHypergeometric.java

/**
 * The total population size.//  ww  w .  ja v a2s .  c o m
 * 
 * @param populationSize
 *            The value for the population size.
 * @throws NotStrictlyPositiveException
 *             If the supplied value was {@code <=0}.
 */
public void setPopulationSize(int populationSize) throws NotStrictlyPositiveException {
    setDistribution(
            new HypergeometricDistribution(populationSize, dist.getNumberOfSuccesses(), dist.getSampleSize()));
}

From source file:jasima.core.random.discrete.IntHypergeometric.java

/**
 * The number of possible successes in the population.
 * /*from w  w  w .  j  av  a 2 s. c o m*/
 * @param numberOfSuccesses
 *            The number of successes.
 * @throws NumberIsTooLargeException
 *             If the supplied value is larger than {@code populationSize}.
 */
public void setNumberOfSuccesses(int numberOfSuccesses) throws NumberIsTooLargeException {
    setDistribution(
            new HypergeometricDistribution(dist.getPopulationSize(), numberOfSuccesses, dist.getSampleSize()));
}

From source file:jasima.core.random.discrete.IntHypergeometric.java

/**
 * The number of samples taken./*w ww . java 2s.  c  o  m*/
 * 
 * @param sampleSize
 *            The number of trials conducted.
 * @throws NumberIsTooLargeException
 *             If the supplied value is larger than {@code populationSize}.
 */
public void setSampleSize(int sampleSize) throws NumberIsTooLargeException {
    setDistribution(
            new HypergeometricDistribution(dist.getPopulationSize(), dist.getNumberOfSuccesses(), sampleSize));
}

From source file:edu.sdsc.scigraph.analyzer.HyperGeometricAnalyzer.java

public List<AnalyzerResult> analyze(AnalyzeRequest request) {
    List<AnalyzerResult> pValues = new ArrayList<>();
    try (Transaction tx = graphDb.beginTx()) {
        AnalyzeRequest processedRequest = processRequest(request);

        Set<AnalyzerResult> sampleSetNodes = getSampleSetNodes(processedRequest);

        Set<AnalyzerResult> completeSetNodes = getCompleteSetNodes(processedRequest);

        int totalCount = getTotalCount(processedRequest.getOntologyClass());

        // apply the HyperGeometricDistribution for each node
        for (AnalyzerResult n : sampleSetNodes) {
            HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount,
                    (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size());
            double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount());
            pValues.add(new AnalyzerResult(n.getNodeId(), p));
        }/* w w  w  .  j  a  va 2s .  co m*/

        // sort by p-value
        Collections.sort(pValues, new AnalyzerResultComparator());

        tx.success();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return pValues;
}

From source file:io.scigraph.analyzer.HyperGeometricAnalyzer.java

public List<AnalyzerResult> analyze(AnalyzeRequest request) {
    List<AnalyzerResult> pValues = new ArrayList<>();
    try (Transaction tx = graphDb.beginTx()) {
        AnalyzeRequest processedRequest = processRequest(request);

        Set<AnalyzerInnerNode> sampleSetNodes = getSampleSetNodes(processedRequest);

        Set<AnalyzerInnerNode> completeSetNodes = getCompleteSetNodes(processedRequest);

        double bonferroniCoeff = computeBonferroniCoeff(completeSetNodes);

        int totalCount = getTotalCount(processedRequest.getOntologyClass());

        // apply the HyperGeometricDistribution for each node
        for (AnalyzerInnerNode n : sampleSetNodes) {
            HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount,
                    (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size());
            double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount())
                    * bonferroniCoeff;//from   w w w  .  ja  v  a2 s. c  om
            String iri = graph.getNodeProperty(n.getNodeId(), CommonProperties.IRI, String.class).get();
            String curie = curieUtil.getCurie(iri).or(iri);
            String labels = StringUtils
                    .join(graph.getNodeProperties(n.getNodeId(), NodeProperties.LABEL, String.class), ", ");
            pValues.add(new AnalyzerResult(labels, curie, p));
        }

        // sort by p-value
        Collections.sort(pValues, new AnalyzerResultComparator());

        tx.success();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return pValues;
}

From source file:ch.unil.genescore.pathway.GeneSetLibrary.java

License:asdf

/** Compute enrichment for the given set using hypergeometric distribution (magenta-option)
 *    quantile refers to the cutoff used to assign ins and outs.
 * *//*from   ww  w.ja  v  a 2s .c o  m*/
protected double[] computeHypGeomPvalue2(double[] set, double[] totSet) {
    ArrayList<Double> quantiles = Pascal.set.hypGeomQuantiles_;
    double[] pvals = new double[quantiles.size()];
    if (set.length == 0) {
        for (int i = 0; i < quantiles.size(); i++) {
            pvals[i] = 1;
        }
        return pvals;
    }

    for (int j = 0; j < quantiles.size(); j++) {
        double[] valAr = totSet.clone();
        Arrays.sort(valAr);
        int n = (int) Math.floor(valAr.length * quantiles.get(j));
        double cutoffVal = valAr[n];
        int setSize = valAr.length - n - 1;

        int q = 0;
        for (double g : set) {
            if (g > cutoffVal)
                q += 1;
        }
        HypergeometricDistribution myHypgeom = new HypergeometricDistribution(valAr.length, setSize,
                set.length);
        double pval = 1 - myHypgeom.cumulativeProbability((q - 1));
        pvals[j] = pval;
    }
    return (pvals);
}

From source file:ch.unil.genescore.pathway.GeneSetLibrary.java

License:asdf

protected void computeHypGeomPvalue(GeneSet set) {
    ArrayList<Double> quantiles = Pascal.set.hypGeomQuantiles_;
    double[] pvals = new double[quantiles.size()];
    if (set.getGenes().size() == 0) {
        for (int i = 0; i < quantiles.size(); i++) {
            pvals[i] = 1;//from ww w .  j a  va 2  s  .  co  m
        }
        set.setHypGeomPvalues(pvals);
        return;
    }

    for (int j = 0; j < quantiles.size(); j++) {
        double[] valAr = new double[genesForSimulation_.size()];
        for (int i = 0; i < genesForSimulation_.size(); i++) {
            valAr[i] = genesForSimulation_.get(i).getChi2Stat();
        }
        Arrays.sort(valAr);
        int n = (int) Math.floor(valAr.length * quantiles.get(j));
        double cutoffVal = valAr[n];
        int setSize = valAr.length - n - 1;
        HashSet<Gene> pathwayGenes = set.getGenes();
        if (pathwayGenes.size() == 0)
            return;

        int q = 0;
        for (Gene g : pathwayGenes) {
            if (g.getChi2Stat() > cutoffVal)
                q += 1;
        }

        HypergeometricDistribution myHypgeom = new HypergeometricDistribution(valAr.length, setSize,
                pathwayGenes.size());
        double pval = 1 - myHypgeom.cumulativeProbability((q - 1));
        pvals[j] = pval;
    }
    set.setHypGeomPvalues(pvals);
}

From source file:it.iit.genomics.cru.simsearch.bundle.utils.AnnotationsFromTrack.java

public void process(String trackId, Collection<TopkResult> results) {

    // Chromosome n

    // for each annotation

    // for each result
    // for each region
    // if overlapp
    // --> add annotation
    // next annotation
    // next result.

    annotationsCount = new HashMap<>();
    annotationsMappedCount = new HashMap<>();

    // Order topkresults
    HashMap<String, ArrayList<Region>> orderedRegions = new HashMap<>();

    logger.info("Order regions");

    try {/*w  ww.  j  ava2s . c  o m*/

        for (TopkResult result : results) {
            for (String datasetId : result.getPositiveMatchDatasetIds()) {
                if (result.getDataset(datasetId) == null) {
                    continue;
                }

                if (result.getDataset(datasetId).getRegions() == null) {
                    continue;
                }

                for (Region region : result.getDataset(datasetId).getRegions()) {

                    if (region == null) {
                        logger.info("null region");
                        continue;
                    }

                    if (region.getChromosome() == null) {
                        logger.info("null chromosome");
                        continue;
                    }

                    ArrayList<Region> regions = orderedRegions.get(region.getChromosome());

                    if (regions == null) {
                        regions = new ArrayList<Region>();
                        orderedRegions.put(region.getChromosome(), regions);
                    }

                    int i = 0;
                    for (; i < regions.size(); i++) {
                        if (regions.get(i).getLeft() > region.getLeft()) {
                            break;
                        }
                    }
                    regions.add(i, region);
                }
            }
        }

    } catch (Exception e) {
        logger.severe(e.getMessage());
        e.printStackTrace();
    }

    logger.info("Regions ordered");

    Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion())
            .ifPresent(selectedGenomeVersion -> {
                selectedGenomeVersion.getAvailableDataContainers().stream()
                        .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible())
                        .filter(dataSet -> dataSet.getDataSetName().equals(trackId))
                        .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()),
                                dataSet.getURI(), Optional.empty(),
                                DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion))
                        .forEach(symLoader -> {
                            selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> {

                                String chromosome = bioSeq.getId();
                                if (chromosome.startsWith("chr") && false == chromosome.contains("_")) {
                                    logger.severe("chromosome: " + chromosome);
                                }
                                ArrayList<Region> regions = orderedRegions.get(chromosome);

                                if (regions != null) {

                                    // Iterator<Region> regionIt = regions.iterator();

                                    // Region currentResult = regionIt.next();
                                    // SeqSpan span = null;

                                    try {

                                        List<? extends SeqSymmetry> symmetries = symLoader
                                                .getChromosome(bioSeq);
                                        if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                                && symmetries != null && symmetries.size() > 0) {

                                            Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                                    .iterator();

                                            while (annotationSymIterator.hasNext()) {
                                                String annotation = null;
                                                SeqSymmetry seqSym = annotationSymIterator.next();
                                                // span = seqSym.getSpan(0);

                                                if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                                    annotation = ((SymWithProps) seqSym).getProperty("name")
                                                            .toString();
                                                } else {
                                                    // find the first text annotation
                                                    for (Object propertyValue : ((SymWithProps) seqSym)
                                                            .getProperties().values()) {
                                                        String s = propertyValue.toString();
                                                        if (s.matches("[A-Za-z]")) {
                                                            annotation = s;
                                                            break;
                                                        }
                                                    }
                                                }

                                                if (null != annotation) {
                                                    increase(annotationsCount, annotation);
                                                }

                                            }
                                        }
                                    } catch (Exception e) {
                                        logger.severe("pre-load annotations: " + bioSeq.getId());
                                    }
                                }
                            });
                        });
            });

    for (String annotation : annotationsCount.keySet()) {
        logger.severe("Annotation: " + annotation + " : " + annotationsCount.get(annotation));
    }

    Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion())
            .ifPresent(selectedGenomeVersion -> {
                selectedGenomeVersion.getAvailableDataContainers().stream()
                        .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible())
                        .filter(dataSet -> dataSet.getDataSetName().equals(trackId))
                        .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()),
                                dataSet.getURI(), Optional.empty(),
                                DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion))
                        .forEach(symLoader -> {
                            selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> {

                                String chromosome = bioSeq.getId();
                                if (chromosome.startsWith("chr") && false == chromosome.contains("_")) {
                                    logger.severe("chromosome: " + chromosome);
                                }
                                ArrayList<Region> regions = orderedRegions.get(chromosome);

                                if (regions != null) {

                                    Iterator<Region> regionIt = regions.iterator();

                                    Region currentResult = regionIt.next();
                                    SeqSpan span = null;

                                    String annotation = null;

                                    try {

                                        List<? extends SeqSymmetry> symmetries = symLoader
                                                .getChromosome(bioSeq);
                                        if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                                && symmetries != null && symmetries.size() > 0) {

                                            Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                                    .iterator();

                                            // logger.info("Iterator
                                            // ready");

                                            boolean nextResult = false;
                                            boolean nextAnnotation = true;

                                            while (nextResult || nextAnnotation) {

                                                if (nextAnnotation) {
                                                    // if (false == annotationSymIterator.hasNext()) {
                                                    // /**
                                                    // * Finish to read the matchings
                                                    // */
                                                    // break;
                                                    // }

                                                    SeqSymmetry seqSym = annotationSymIterator.next();

                                                    if (seqSym.getSpanCount() == 0) {
                                                        continue;
                                                    }

                                                    if (false == SymWithProps.class.isInstance(seqSym)) {
                                                        continue;
                                                    }

                                                    span = seqSym.getSpan(0);

                                                    if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                                        annotation = ((SymWithProps) seqSym).getProperty("name")
                                                                .toString();
                                                    } else {
                                                        // find the first text annotation
                                                        for (Object propertyValue : ((SymWithProps) seqSym)
                                                                .getProperties().values()) {
                                                            String s = propertyValue.toString();
                                                            if (s.matches("[A-Za-z]")) {
                                                                annotation = s;
                                                                break;
                                                            }
                                                        }
                                                    }

                                                    // if (null != annotation) {
                                                    // increase(annotationsCount, annotation);
                                                    // }
                                                    if (span.getMax() < currentResult.getLeft()) {
                                                        continue;
                                                    }

                                                    nextAnnotation = false;
                                                }

                                                if (nextResult) {
                                                    if (false == regionIt.hasNext()) {

                                                        break;
                                                    }
                                                    currentResult = regionIt.next();

                                                    if (currentResult.getRight() < span.getMin()) {
                                                        continue;
                                                    }

                                                    nextResult = false;

                                                }

                                                if (overlap(span, currentResult)) {
                                                    increase(annotationsMappedCount, annotation);
                                                    matchingMappedByAnnotationType.put(annotation,
                                                            currentResult.toString());
                                                    // next annotation
                                                    nextAnnotation = true;
                                                    nextResult = false;
                                                } else if (currentResult.getRight() < span.getMin()) {
                                                    nextResult = true;
                                                    nextAnnotation = false;
                                                } else {
                                                    nextResult = false;
                                                    nextAnnotation = true;
                                                    // nextResult =
                                                    // true;
                                                }
                                            }

                                        }

                                    } catch (Exception ex) {
                                        logger.severe(ex.getMessage() + ". Feature2 "
                                                + symLoader.getFeatureName() + ", " + bioSeq.getId());
                                        ex.printStackTrace();
                                    }
                                }
                                // else {
                                // GCount annotations anyway
                                /**
                                 * Finish to read the annotations
                                 */
                                // try{
                                // if (symLoader != null && bioSeq != null) {
                                // List<? extends SeqSymmetry> symmetries = symLoader.getChromosome(bioSeq);
                                // if (chromosome.startsWith("chr") && false == chromosome.contains("_")
                                // && symmetries != null && symmetries.size() > 0) {

                                // Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries
                                // .iterator();

                                // while (annotationSymIterator.hasNext()) {
                                // SeqSymmetry seqSym = annotationSymIterator.next();

                                // if (seqSym.getSpanCount() == 0) {
                                // continue;
                                // }

                                // if (false == SymWithProps.class.isInstance(seqSym)) {
                                // continue;
                                // }

                                // SeqSpan span = seqSym.getSpan(0);
                                // String annotation = null;
                                // if (null != ((SymWithProps) seqSym).getProperty("name")) {
                                // annotation = ((SymWithProps) seqSym).getProperty("name")
                                // .toString();
                                // } else {
                                // // find the first text annotation
                                // for (Object propertyValue : ((SymWithProps) seqSym)
                                // .getProperties().values()) {
                                // String s = propertyValue.toString();
                                // if (s.matches("[A-Za-z]")) {
                                // annotation = s;
                                // break;
                                // }
                                // }
                                // }

                                // if (null != annotation) {
                                // increase(annotationsCount, annotation);
                                // }
                                // }}
                                // /**
                                // * FINISHED READ ALL ANNOTATIONS
                                // */
                                // }
                                // } catch (Exception ex) {
                                // logger.severe(ex.getMessage() + ". Feature3 " + symLoader.getFeatureName()
                                // + ", " + bioSeq.getId());
                                // // ex.printStackTrace();
                                // }
                                // }
                            });
                        });
            });

    /**
     * Get pvalues
     */

    int populationSize = this.getNumberOfAnnotations();
    int sampleSize = this.getNumberOfAnnotationsMapped();

    for (String annotation : this.getAnnotations()) {
        int numberOfSuccesses = this.getNumberOfAnnotations(annotation);

        HypergeometricDistribution hd = new HypergeometricDistribution(populationSize, numberOfSuccesses,
                sampleSize);

        double probability = hd.upperCumulativeProbability(this.getNumberOfAnnotationsMapped(annotation));
        this.pValues.put(annotation, probability);
        logger.severe("pvalue: " + numberOfSuccesses + " | " + sampleSize + " | " + populationSize + " | "
                + this.getNumberOfAnnotationsMapped(annotation) + " = " + probability);
    }

    logger.info("Num annotations: " + this.getAnnotations().size() + " / " + annotationsCount.keySet().size());

}

From source file:org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils.java

/**
 * Computes a two-sided p-Value for a Fisher's exact test on the contingency table, after normalizing counts so that the sum does not exceed {@value org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils#TARGET_TABLE_SIZE}
 * @param originalTable//from  w ww .j  av a2 s . com
 * @return
 */
public static Double FisherExactPValueForContingencyTable(int[][] originalTable) {
    final int[][] normalizedTable = normalizeContingencyTable(originalTable);

    int[][] table = copyContingencyTable(normalizedTable);

    int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
    int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
    int N = rowSums[0] + rowSums[1];
    int sampleSize = colSums[0];
    int numberOfNonSuccesses = rowSums[1];
    int numberOfSuccesses = rowSums[0];
    /*
     * The lowest possible number of successes you can sample is what's left of your sample size after
     * drawing every non success in the urn. If the number of non successes in the urn is greater than the sample
     * size then the minimum number of drawn successes is 0.
     */
    int lo = Math.max(0, sampleSize - numberOfNonSuccesses);
    /*
     * The highest possible number of successes you can draw is either the total sample size or the number of
     * successes in the urn. (Whichever is smaller)
     */
    int hi = Math.min(sampleSize, numberOfSuccesses);

    /**
     * If the support of the distribution is only one value, creating the HypergeometricDistribution
     * doesn't make sense. There would be only one possible observation so the p-value has to be 1.
     */
    if (lo == hi) {
        return 1.0;
    }

    /**
     * For the hypergeometric distribution from which to calculate the probabilities:
     * The population (N = a+b+c+d) is the sum of all four numbers in the contingency table. Then the number of
     * "successes" (K = a+b) is the sum of the top row, and the sample size (n = a+c) is the sum of the first column.
     */
    final HypergeometricDistribution dist = new HypergeometricDistribution(N, numberOfSuccesses, sampleSize);

    //Then we determine a given probability with the sampled successes (k = a) from the first entry in the table.
    double pCutoff = dist.probability(table[0][0]);

    double pValue = 0.0;
    /**
     * Now cycle through all possible k's and add those whose probabilities are smaller than our observed k
     * to the p-value, since this is a two-sided test
     */
    for (int i = lo; i <= hi; i++) {
        double pValuePiece = dist.probability(i);

        if (pValuePiece <= pCutoff) {
            pValue += pValuePiece;
        }
    }

    // min is necessary as numerical precision can result in pValue being slightly greater than 1.0
    return Math.min(pValue, 1.0);
}