List of usage examples for org.apache.commons.math3.distribution HypergeometricDistribution HypergeometricDistribution
public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize) throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException
From source file:jasima.core.random.discrete.IntHypergeometric.java
public IntHypergeometric(int populationSize, int numberOfSuccesses, int sampleSize) { super();// w w w . j a v a2s .co m setDistribution(new HypergeometricDistribution(populationSize, numberOfSuccesses, sampleSize)); }
From source file:jasima.core.random.discrete.IntHypergeometric.java
/** * The total population size.// ww w . ja v a2s . c o m * * @param populationSize * The value for the population size. * @throws NotStrictlyPositiveException * If the supplied value was {@code <=0}. */ public void setPopulationSize(int populationSize) throws NotStrictlyPositiveException { setDistribution( new HypergeometricDistribution(populationSize, dist.getNumberOfSuccesses(), dist.getSampleSize())); }
From source file:jasima.core.random.discrete.IntHypergeometric.java
/** * The number of possible successes in the population. * /*from w w w . j av a 2 s. c o m*/ * @param numberOfSuccesses * The number of successes. * @throws NumberIsTooLargeException * If the supplied value is larger than {@code populationSize}. */ public void setNumberOfSuccesses(int numberOfSuccesses) throws NumberIsTooLargeException { setDistribution( new HypergeometricDistribution(dist.getPopulationSize(), numberOfSuccesses, dist.getSampleSize())); }
From source file:jasima.core.random.discrete.IntHypergeometric.java
/** * The number of samples taken./*w ww . java 2s. c o m*/ * * @param sampleSize * The number of trials conducted. * @throws NumberIsTooLargeException * If the supplied value is larger than {@code populationSize}. */ public void setSampleSize(int sampleSize) throws NumberIsTooLargeException { setDistribution( new HypergeometricDistribution(dist.getPopulationSize(), dist.getNumberOfSuccesses(), sampleSize)); }
From source file:edu.sdsc.scigraph.analyzer.HyperGeometricAnalyzer.java
public List<AnalyzerResult> analyze(AnalyzeRequest request) { List<AnalyzerResult> pValues = new ArrayList<>(); try (Transaction tx = graphDb.beginTx()) { AnalyzeRequest processedRequest = processRequest(request); Set<AnalyzerResult> sampleSetNodes = getSampleSetNodes(processedRequest); Set<AnalyzerResult> completeSetNodes = getCompleteSetNodes(processedRequest); int totalCount = getTotalCount(processedRequest.getOntologyClass()); // apply the HyperGeometricDistribution for each node for (AnalyzerResult n : sampleSetNodes) { HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount, (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size()); double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount()); pValues.add(new AnalyzerResult(n.getNodeId(), p)); }/* w w w . j a va 2s . co m*/ // sort by p-value Collections.sort(pValues, new AnalyzerResultComparator()); tx.success(); } catch (Exception e) { e.printStackTrace(); } return pValues; }
From source file:io.scigraph.analyzer.HyperGeometricAnalyzer.java
public List<AnalyzerResult> analyze(AnalyzeRequest request) { List<AnalyzerResult> pValues = new ArrayList<>(); try (Transaction tx = graphDb.beginTx()) { AnalyzeRequest processedRequest = processRequest(request); Set<AnalyzerInnerNode> sampleSetNodes = getSampleSetNodes(processedRequest); Set<AnalyzerInnerNode> completeSetNodes = getCompleteSetNodes(processedRequest); double bonferroniCoeff = computeBonferroniCoeff(completeSetNodes); int totalCount = getTotalCount(processedRequest.getOntologyClass()); // apply the HyperGeometricDistribution for each node for (AnalyzerInnerNode n : sampleSetNodes) { HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount, (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size()); double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount()) * bonferroniCoeff;//from w w w . ja v a2 s. c om String iri = graph.getNodeProperty(n.getNodeId(), CommonProperties.IRI, String.class).get(); String curie = curieUtil.getCurie(iri).or(iri); String labels = StringUtils .join(graph.getNodeProperties(n.getNodeId(), NodeProperties.LABEL, String.class), ", "); pValues.add(new AnalyzerResult(labels, curie, p)); } // sort by p-value Collections.sort(pValues, new AnalyzerResultComparator()); tx.success(); } catch (Exception e) { e.printStackTrace(); } return pValues; }
From source file:ch.unil.genescore.pathway.GeneSetLibrary.java
License:asdf
/** Compute enrichment for the given set using hypergeometric distribution (magenta-option) * quantile refers to the cutoff used to assign ins and outs. * *//*from ww w.ja v a 2s .c o m*/ protected double[] computeHypGeomPvalue2(double[] set, double[] totSet) { ArrayList<Double> quantiles = Pascal.set.hypGeomQuantiles_; double[] pvals = new double[quantiles.size()]; if (set.length == 0) { for (int i = 0; i < quantiles.size(); i++) { pvals[i] = 1; } return pvals; } for (int j = 0; j < quantiles.size(); j++) { double[] valAr = totSet.clone(); Arrays.sort(valAr); int n = (int) Math.floor(valAr.length * quantiles.get(j)); double cutoffVal = valAr[n]; int setSize = valAr.length - n - 1; int q = 0; for (double g : set) { if (g > cutoffVal) q += 1; } HypergeometricDistribution myHypgeom = new HypergeometricDistribution(valAr.length, setSize, set.length); double pval = 1 - myHypgeom.cumulativeProbability((q - 1)); pvals[j] = pval; } return (pvals); }
From source file:ch.unil.genescore.pathway.GeneSetLibrary.java
License:asdf
protected void computeHypGeomPvalue(GeneSet set) { ArrayList<Double> quantiles = Pascal.set.hypGeomQuantiles_; double[] pvals = new double[quantiles.size()]; if (set.getGenes().size() == 0) { for (int i = 0; i < quantiles.size(); i++) { pvals[i] = 1;//from ww w . j a va 2 s . co m } set.setHypGeomPvalues(pvals); return; } for (int j = 0; j < quantiles.size(); j++) { double[] valAr = new double[genesForSimulation_.size()]; for (int i = 0; i < genesForSimulation_.size(); i++) { valAr[i] = genesForSimulation_.get(i).getChi2Stat(); } Arrays.sort(valAr); int n = (int) Math.floor(valAr.length * quantiles.get(j)); double cutoffVal = valAr[n]; int setSize = valAr.length - n - 1; HashSet<Gene> pathwayGenes = set.getGenes(); if (pathwayGenes.size() == 0) return; int q = 0; for (Gene g : pathwayGenes) { if (g.getChi2Stat() > cutoffVal) q += 1; } HypergeometricDistribution myHypgeom = new HypergeometricDistribution(valAr.length, setSize, pathwayGenes.size()); double pval = 1 - myHypgeom.cumulativeProbability((q - 1)); pvals[j] = pval; } set.setHypGeomPvalues(pvals); }
From source file:it.iit.genomics.cru.simsearch.bundle.utils.AnnotationsFromTrack.java
public void process(String trackId, Collection<TopkResult> results) { // Chromosome n // for each annotation // for each result // for each region // if overlapp // --> add annotation // next annotation // next result. annotationsCount = new HashMap<>(); annotationsMappedCount = new HashMap<>(); // Order topkresults HashMap<String, ArrayList<Region>> orderedRegions = new HashMap<>(); logger.info("Order regions"); try {/*w ww. j ava2s . c o m*/ for (TopkResult result : results) { for (String datasetId : result.getPositiveMatchDatasetIds()) { if (result.getDataset(datasetId) == null) { continue; } if (result.getDataset(datasetId).getRegions() == null) { continue; } for (Region region : result.getDataset(datasetId).getRegions()) { if (region == null) { logger.info("null region"); continue; } if (region.getChromosome() == null) { logger.info("null chromosome"); continue; } ArrayList<Region> regions = orderedRegions.get(region.getChromosome()); if (regions == null) { regions = new ArrayList<Region>(); orderedRegions.put(region.getChromosome(), regions); } int i = 0; for (; i < regions.size(); i++) { if (regions.get(i).getLeft() > region.getLeft()) { break; } } regions.add(i, region); } } } } catch (Exception e) { logger.severe(e.getMessage()); e.printStackTrace(); } logger.info("Regions ordered"); Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion()) .ifPresent(selectedGenomeVersion -> { selectedGenomeVersion.getAvailableDataContainers().stream() .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible()) .filter(dataSet -> dataSet.getDataSetName().equals(trackId)) .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()), dataSet.getURI(), Optional.empty(), DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion)) .forEach(symLoader -> { selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> { String chromosome = bioSeq.getId(); if (chromosome.startsWith("chr") && false == chromosome.contains("_")) { logger.severe("chromosome: " + chromosome); } ArrayList<Region> regions = orderedRegions.get(chromosome); if (regions != null) { // Iterator<Region> regionIt = regions.iterator(); // Region currentResult = regionIt.next(); // SeqSpan span = null; try { List<? extends SeqSymmetry> symmetries = symLoader .getChromosome(bioSeq); if (chromosome.startsWith("chr") && false == chromosome.contains("_") && symmetries != null && symmetries.size() > 0) { Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries .iterator(); while (annotationSymIterator.hasNext()) { String annotation = null; SeqSymmetry seqSym = annotationSymIterator.next(); // span = seqSym.getSpan(0); if (null != ((SymWithProps) seqSym).getProperty("name")) { annotation = ((SymWithProps) seqSym).getProperty("name") .toString(); } else { // find the first text annotation for (Object propertyValue : ((SymWithProps) seqSym) .getProperties().values()) { String s = propertyValue.toString(); if (s.matches("[A-Za-z]")) { annotation = s; break; } } } if (null != annotation) { increase(annotationsCount, annotation); } } } } catch (Exception e) { logger.severe("pre-load annotations: " + bioSeq.getId()); } } }); }); }); for (String annotation : annotationsCount.keySet()) { logger.severe("Annotation: " + annotation + " : " + annotationsCount.get(annotation)); } Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion()) .ifPresent(selectedGenomeVersion -> { selectedGenomeVersion.getAvailableDataContainers().stream() .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible()) .filter(dataSet -> dataSet.getDataSetName().equals(trackId)) .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()), dataSet.getURI(), Optional.empty(), DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion)) .forEach(symLoader -> { selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> { String chromosome = bioSeq.getId(); if (chromosome.startsWith("chr") && false == chromosome.contains("_")) { logger.severe("chromosome: " + chromosome); } ArrayList<Region> regions = orderedRegions.get(chromosome); if (regions != null) { Iterator<Region> regionIt = regions.iterator(); Region currentResult = regionIt.next(); SeqSpan span = null; String annotation = null; try { List<? extends SeqSymmetry> symmetries = symLoader .getChromosome(bioSeq); if (chromosome.startsWith("chr") && false == chromosome.contains("_") && symmetries != null && symmetries.size() > 0) { Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries .iterator(); // logger.info("Iterator // ready"); boolean nextResult = false; boolean nextAnnotation = true; while (nextResult || nextAnnotation) { if (nextAnnotation) { // if (false == annotationSymIterator.hasNext()) { // /** // * Finish to read the matchings // */ // break; // } SeqSymmetry seqSym = annotationSymIterator.next(); if (seqSym.getSpanCount() == 0) { continue; } if (false == SymWithProps.class.isInstance(seqSym)) { continue; } span = seqSym.getSpan(0); if (null != ((SymWithProps) seqSym).getProperty("name")) { annotation = ((SymWithProps) seqSym).getProperty("name") .toString(); } else { // find the first text annotation for (Object propertyValue : ((SymWithProps) seqSym) .getProperties().values()) { String s = propertyValue.toString(); if (s.matches("[A-Za-z]")) { annotation = s; break; } } } // if (null != annotation) { // increase(annotationsCount, annotation); // } if (span.getMax() < currentResult.getLeft()) { continue; } nextAnnotation = false; } if (nextResult) { if (false == regionIt.hasNext()) { break; } currentResult = regionIt.next(); if (currentResult.getRight() < span.getMin()) { continue; } nextResult = false; } if (overlap(span, currentResult)) { increase(annotationsMappedCount, annotation); matchingMappedByAnnotationType.put(annotation, currentResult.toString()); // next annotation nextAnnotation = true; nextResult = false; } else if (currentResult.getRight() < span.getMin()) { nextResult = true; nextAnnotation = false; } else { nextResult = false; nextAnnotation = true; // nextResult = // true; } } } } catch (Exception ex) { logger.severe(ex.getMessage() + ". Feature2 " + symLoader.getFeatureName() + ", " + bioSeq.getId()); ex.printStackTrace(); } } // else { // GCount annotations anyway /** * Finish to read the annotations */ // try{ // if (symLoader != null && bioSeq != null) { // List<? extends SeqSymmetry> symmetries = symLoader.getChromosome(bioSeq); // if (chromosome.startsWith("chr") && false == chromosome.contains("_") // && symmetries != null && symmetries.size() > 0) { // Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries // .iterator(); // while (annotationSymIterator.hasNext()) { // SeqSymmetry seqSym = annotationSymIterator.next(); // if (seqSym.getSpanCount() == 0) { // continue; // } // if (false == SymWithProps.class.isInstance(seqSym)) { // continue; // } // SeqSpan span = seqSym.getSpan(0); // String annotation = null; // if (null != ((SymWithProps) seqSym).getProperty("name")) { // annotation = ((SymWithProps) seqSym).getProperty("name") // .toString(); // } else { // // find the first text annotation // for (Object propertyValue : ((SymWithProps) seqSym) // .getProperties().values()) { // String s = propertyValue.toString(); // if (s.matches("[A-Za-z]")) { // annotation = s; // break; // } // } // } // if (null != annotation) { // increase(annotationsCount, annotation); // } // }} // /** // * FINISHED READ ALL ANNOTATIONS // */ // } // } catch (Exception ex) { // logger.severe(ex.getMessage() + ". Feature3 " + symLoader.getFeatureName() // + ", " + bioSeq.getId()); // // ex.printStackTrace(); // } // } }); }); }); /** * Get pvalues */ int populationSize = this.getNumberOfAnnotations(); int sampleSize = this.getNumberOfAnnotationsMapped(); for (String annotation : this.getAnnotations()) { int numberOfSuccesses = this.getNumberOfAnnotations(annotation); HypergeometricDistribution hd = new HypergeometricDistribution(populationSize, numberOfSuccesses, sampleSize); double probability = hd.upperCumulativeProbability(this.getNumberOfAnnotationsMapped(annotation)); this.pValues.put(annotation, probability); logger.severe("pvalue: " + numberOfSuccesses + " | " + sampleSize + " | " + populationSize + " | " + this.getNumberOfAnnotationsMapped(annotation) + " = " + probability); } logger.info("Num annotations: " + this.getAnnotations().size() + " / " + annotationsCount.keySet().size()); }
From source file:org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils.java
/** * Computes a two-sided p-Value for a Fisher's exact test on the contingency table, after normalizing counts so that the sum does not exceed {@value org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils#TARGET_TABLE_SIZE} * @param originalTable//from w ww .j av a2 s . com * @return */ public static Double FisherExactPValueForContingencyTable(int[][] originalTable) { final int[][] normalizedTable = normalizeContingencyTable(originalTable); int[][] table = copyContingencyTable(normalizedTable); int[] rowSums = { sumRow(table, 0), sumRow(table, 1) }; int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) }; int N = rowSums[0] + rowSums[1]; int sampleSize = colSums[0]; int numberOfNonSuccesses = rowSums[1]; int numberOfSuccesses = rowSums[0]; /* * The lowest possible number of successes you can sample is what's left of your sample size after * drawing every non success in the urn. If the number of non successes in the urn is greater than the sample * size then the minimum number of drawn successes is 0. */ int lo = Math.max(0, sampleSize - numberOfNonSuccesses); /* * The highest possible number of successes you can draw is either the total sample size or the number of * successes in the urn. (Whichever is smaller) */ int hi = Math.min(sampleSize, numberOfSuccesses); /** * If the support of the distribution is only one value, creating the HypergeometricDistribution * doesn't make sense. There would be only one possible observation so the p-value has to be 1. */ if (lo == hi) { return 1.0; } /** * For the hypergeometric distribution from which to calculate the probabilities: * The population (N = a+b+c+d) is the sum of all four numbers in the contingency table. Then the number of * "successes" (K = a+b) is the sum of the top row, and the sample size (n = a+c) is the sum of the first column. */ final HypergeometricDistribution dist = new HypergeometricDistribution(N, numberOfSuccesses, sampleSize); //Then we determine a given probability with the sampled successes (k = a) from the first entry in the table. double pCutoff = dist.probability(table[0][0]); double pValue = 0.0; /** * Now cycle through all possible k's and add those whose probabilities are smaller than our observed k * to the p-value, since this is a two-sided test */ for (int i = lo; i <= hi; i++) { double pValuePiece = dist.probability(i); if (pValuePiece <= pCutoff) { pValue += pValuePiece; } } // min is necessary as numerical precision can result in pValue being slightly greater than 1.0 return Math.min(pValue, 1.0); }