List of usage examples for org.apache.commons.math3.distribution HypergeometricDistribution upperCumulativeProbability
public double upperCumulativeProbability(int x)
From source file:edu.sdsc.scigraph.analyzer.HyperGeometricAnalyzer.java
public List<AnalyzerResult> analyze(AnalyzeRequest request) { List<AnalyzerResult> pValues = new ArrayList<>(); try (Transaction tx = graphDb.beginTx()) { AnalyzeRequest processedRequest = processRequest(request); Set<AnalyzerResult> sampleSetNodes = getSampleSetNodes(processedRequest); Set<AnalyzerResult> completeSetNodes = getCompleteSetNodes(processedRequest); int totalCount = getTotalCount(processedRequest.getOntologyClass()); // apply the HyperGeometricDistribution for each node for (AnalyzerResult n : sampleSetNodes) { HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount, (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size()); double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount()); pValues.add(new AnalyzerResult(n.getNodeId(), p)); }/*w w w . j a va2 s. c om*/ // sort by p-value Collections.sort(pValues, new AnalyzerResultComparator()); tx.success(); } catch (Exception e) { e.printStackTrace(); } return pValues; }
From source file:io.scigraph.analyzer.HyperGeometricAnalyzer.java
public List<AnalyzerResult> analyze(AnalyzeRequest request) { List<AnalyzerResult> pValues = new ArrayList<>(); try (Transaction tx = graphDb.beginTx()) { AnalyzeRequest processedRequest = processRequest(request); Set<AnalyzerInnerNode> sampleSetNodes = getSampleSetNodes(processedRequest); Set<AnalyzerInnerNode> completeSetNodes = getCompleteSetNodes(processedRequest); double bonferroniCoeff = computeBonferroniCoeff(completeSetNodes); int totalCount = getTotalCount(processedRequest.getOntologyClass()); // apply the HyperGeometricDistribution for each node for (AnalyzerInnerNode n : sampleSetNodes) { HypergeometricDistribution hypergeometricDistribution = new HypergeometricDistribution(totalCount, (int) getCountFrom(completeSetNodes, n.getNodeId()), processedRequest.getSamples().size()); double p = hypergeometricDistribution.upperCumulativeProbability((int) n.getCount()) * bonferroniCoeff;/*w w w . ja v a 2 s. c om*/ String iri = graph.getNodeProperty(n.getNodeId(), CommonProperties.IRI, String.class).get(); String curie = curieUtil.getCurie(iri).or(iri); String labels = StringUtils .join(graph.getNodeProperties(n.getNodeId(), NodeProperties.LABEL, String.class), ", "); pValues.add(new AnalyzerResult(labels, curie, p)); } // sort by p-value Collections.sort(pValues, new AnalyzerResultComparator()); tx.success(); } catch (Exception e) { e.printStackTrace(); } return pValues; }
From source file:it.iit.genomics.cru.simsearch.bundle.utils.AnnotationsFromTrack.java
public void process(String trackId, Collection<TopkResult> results) { // Chromosome n // for each annotation // for each result // for each region // if overlapp // --> add annotation // next annotation // next result. annotationsCount = new HashMap<>(); annotationsMappedCount = new HashMap<>(); // Order topkresults HashMap<String, ArrayList<Region>> orderedRegions = new HashMap<>(); logger.info("Order regions"); try {//from w ww. java 2 s. co m for (TopkResult result : results) { for (String datasetId : result.getPositiveMatchDatasetIds()) { if (result.getDataset(datasetId) == null) { continue; } if (result.getDataset(datasetId).getRegions() == null) { continue; } for (Region region : result.getDataset(datasetId).getRegions()) { if (region == null) { logger.info("null region"); continue; } if (region.getChromosome() == null) { logger.info("null chromosome"); continue; } ArrayList<Region> regions = orderedRegions.get(region.getChromosome()); if (regions == null) { regions = new ArrayList<Region>(); orderedRegions.put(region.getChromosome(), regions); } int i = 0; for (; i < regions.size(); i++) { if (regions.get(i).getLeft() > region.getLeft()) { break; } } regions.add(i, region); } } } } catch (Exception e) { logger.severe(e.getMessage()); e.printStackTrace(); } logger.info("Regions ordered"); Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion()) .ifPresent(selectedGenomeVersion -> { selectedGenomeVersion.getAvailableDataContainers().stream() .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible()) .filter(dataSet -> dataSet.getDataSetName().equals(trackId)) .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()), dataSet.getURI(), Optional.empty(), DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion)) .forEach(symLoader -> { selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> { String chromosome = bioSeq.getId(); if (chromosome.startsWith("chr") && false == chromosome.contains("_")) { logger.severe("chromosome: " + chromosome); } ArrayList<Region> regions = orderedRegions.get(chromosome); if (regions != null) { // Iterator<Region> regionIt = regions.iterator(); // Region currentResult = regionIt.next(); // SeqSpan span = null; try { List<? extends SeqSymmetry> symmetries = symLoader .getChromosome(bioSeq); if (chromosome.startsWith("chr") && false == chromosome.contains("_") && symmetries != null && symmetries.size() > 0) { Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries .iterator(); while (annotationSymIterator.hasNext()) { String annotation = null; SeqSymmetry seqSym = annotationSymIterator.next(); // span = seqSym.getSpan(0); if (null != ((SymWithProps) seqSym).getProperty("name")) { annotation = ((SymWithProps) seqSym).getProperty("name") .toString(); } else { // find the first text annotation for (Object propertyValue : ((SymWithProps) seqSym) .getProperties().values()) { String s = propertyValue.toString(); if (s.matches("[A-Za-z]")) { annotation = s; break; } } } if (null != annotation) { increase(annotationsCount, annotation); } } } } catch (Exception e) { logger.severe("pre-load annotations: " + bioSeq.getId()); } } }); }); }); for (String annotation : annotationsCount.keySet()) { logger.severe("Annotation: " + annotation + " : " + annotationsCount.get(annotation)); } Optional.ofNullable(GenometryModel.getInstance().getSelectedGenomeVersion()) .ifPresent(selectedGenomeVersion -> { selectedGenomeVersion.getAvailableDataContainers().stream() .flatMap(dc -> dc.getDataSets().stream()).filter(dataSet -> dataSet.isVisible()) .filter(dataSet -> dataSet.getDataSetName().equals(trackId)) .map(dataSet -> ServerUtils.determineLoader(SymLoader.getExtension(dataSet.getURI()), dataSet.getURI(), Optional.empty(), DataSet.detemineFriendlyName(dataSet.getURI()), selectedGenomeVersion)) .forEach(symLoader -> { selectedGenomeVersion.getSeqList().stream().forEach(bioSeq -> { String chromosome = bioSeq.getId(); if (chromosome.startsWith("chr") && false == chromosome.contains("_")) { logger.severe("chromosome: " + chromosome); } ArrayList<Region> regions = orderedRegions.get(chromosome); if (regions != null) { Iterator<Region> regionIt = regions.iterator(); Region currentResult = regionIt.next(); SeqSpan span = null; String annotation = null; try { List<? extends SeqSymmetry> symmetries = symLoader .getChromosome(bioSeq); if (chromosome.startsWith("chr") && false == chromosome.contains("_") && symmetries != null && symmetries.size() > 0) { Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries .iterator(); // logger.info("Iterator // ready"); boolean nextResult = false; boolean nextAnnotation = true; while (nextResult || nextAnnotation) { if (nextAnnotation) { // if (false == annotationSymIterator.hasNext()) { // /** // * Finish to read the matchings // */ // break; // } SeqSymmetry seqSym = annotationSymIterator.next(); if (seqSym.getSpanCount() == 0) { continue; } if (false == SymWithProps.class.isInstance(seqSym)) { continue; } span = seqSym.getSpan(0); if (null != ((SymWithProps) seqSym).getProperty("name")) { annotation = ((SymWithProps) seqSym).getProperty("name") .toString(); } else { // find the first text annotation for (Object propertyValue : ((SymWithProps) seqSym) .getProperties().values()) { String s = propertyValue.toString(); if (s.matches("[A-Za-z]")) { annotation = s; break; } } } // if (null != annotation) { // increase(annotationsCount, annotation); // } if (span.getMax() < currentResult.getLeft()) { continue; } nextAnnotation = false; } if (nextResult) { if (false == regionIt.hasNext()) { break; } currentResult = regionIt.next(); if (currentResult.getRight() < span.getMin()) { continue; } nextResult = false; } if (overlap(span, currentResult)) { increase(annotationsMappedCount, annotation); matchingMappedByAnnotationType.put(annotation, currentResult.toString()); // next annotation nextAnnotation = true; nextResult = false; } else if (currentResult.getRight() < span.getMin()) { nextResult = true; nextAnnotation = false; } else { nextResult = false; nextAnnotation = true; // nextResult = // true; } } } } catch (Exception ex) { logger.severe(ex.getMessage() + ". Feature2 " + symLoader.getFeatureName() + ", " + bioSeq.getId()); ex.printStackTrace(); } } // else { // GCount annotations anyway /** * Finish to read the annotations */ // try{ // if (symLoader != null && bioSeq != null) { // List<? extends SeqSymmetry> symmetries = symLoader.getChromosome(bioSeq); // if (chromosome.startsWith("chr") && false == chromosome.contains("_") // && symmetries != null && symmetries.size() > 0) { // Iterator<? extends SeqSymmetry> annotationSymIterator = symmetries // .iterator(); // while (annotationSymIterator.hasNext()) { // SeqSymmetry seqSym = annotationSymIterator.next(); // if (seqSym.getSpanCount() == 0) { // continue; // } // if (false == SymWithProps.class.isInstance(seqSym)) { // continue; // } // SeqSpan span = seqSym.getSpan(0); // String annotation = null; // if (null != ((SymWithProps) seqSym).getProperty("name")) { // annotation = ((SymWithProps) seqSym).getProperty("name") // .toString(); // } else { // // find the first text annotation // for (Object propertyValue : ((SymWithProps) seqSym) // .getProperties().values()) { // String s = propertyValue.toString(); // if (s.matches("[A-Za-z]")) { // annotation = s; // break; // } // } // } // if (null != annotation) { // increase(annotationsCount, annotation); // } // }} // /** // * FINISHED READ ALL ANNOTATIONS // */ // } // } catch (Exception ex) { // logger.severe(ex.getMessage() + ". Feature3 " + symLoader.getFeatureName() // + ", " + bioSeq.getId()); // // ex.printStackTrace(); // } // } }); }); }); /** * Get pvalues */ int populationSize = this.getNumberOfAnnotations(); int sampleSize = this.getNumberOfAnnotationsMapped(); for (String annotation : this.getAnnotations()) { int numberOfSuccesses = this.getNumberOfAnnotations(annotation); HypergeometricDistribution hd = new HypergeometricDistribution(populationSize, numberOfSuccesses, sampleSize); double probability = hd.upperCumulativeProbability(this.getNumberOfAnnotationsMapped(annotation)); this.pValues.put(annotation, probability); logger.severe("pvalue: " + numberOfSuccesses + " | " + sampleSize + " | " + populationSize + " | " + this.getNumberOfAnnotationsMapped(annotation) + " = " + probability); } logger.info("Num annotations: " + this.getAnnotations().size() + " / " + annotationsCount.keySet().size()); }
From source file:org.intermine.web.logic.widget.EnrichmentCalculation.java
/** * Perform an enrichment calculation based on input from some source and return results that * include a p-value and count for each attribute that was observed in the sample. Also perform * the type multiple hypothesis error correction specified before returning the results. * @param input details of the sample and population * @param maxValue the maximum p-value to return, for display purposes * @param errorCorrection the type of error correction to perform or None * @param extraCorrectionCoefficient if true correction coefficient has been selected * @param correctionCoefficient a instance of correction coefficient * @return results of the enrichment calculation *//*w ww. j a va 2 s .c o m*/ public static EnrichmentResults calculate(EnrichmentInput input, Double maxValue, String errorCorrection, boolean extraCorrectionCoefficient, CorrectionCoefficient correctionCoefficient) { int sampleSize = input.getSampleSize(); PopulationInfo population = input.getPopulationInfo(); int populationSize = population.getSize(); Map<String, Integer> sampleCounts = input.getAnnotatedCountsInSample(); Map<String, PopulationInfo> annotatedPopulationInfo = input.getAnnotatedCountsInPopulation(); Map<String, BigDecimal> rawResults = new HashMap<String, BigDecimal>(); for (Map.Entry<String, Integer> entry : sampleCounts.entrySet()) { String attribute = entry.getKey(); Integer sampleCount = entry.getValue(); PopulationInfo pi = annotatedPopulationInfo.get(attribute); Integer populationCount = (pi != null) ? pi.getSize() : 0; HypergeometricDistribution h = new HypergeometricDistribution(populationSize, populationCount, sampleSize); Double pValue = h.upperCumulativeProbability(sampleCount); rawResults.put(attribute, new BigDecimal(pValue)); } Map<String, BigDecimal> correctedResults = ErrorCorrection.adjustPValues(errorCorrection, rawResults, maxValue, input.getTestCount()); if (extraCorrectionCoefficient && correctionCoefficient.isApplicable()) { correctionCoefficient.apply(correctedResults, population, annotatedPopulationInfo, maxValue); } Map<String, BigDecimal> sortedCorrectedResults = ErrorCorrection.sortMap(correctedResults); // record the number of items in the sample that had any values for the attribute int widgetTotal = rawResults.isEmpty() ? 0 : sampleSize; EnrichmentResults results = new EnrichmentResults(sortedCorrectedResults, input.getAnnotatedCountsInSample(), input.getLabels(), widgetTotal); return results; }
From source file:ubc.pavlab.gotrack.beans.EnrichmentView.java
private void enrichmentAnalysis(Map<Edition, Map<Gene, Set<GeneOntologyTerm>>> geneGOMap, Map<Edition, Set<GeneOntologyTerm>> allTerms, int minAnnotedPopulation) { enrichmentData = new HashMap<>(); boolean emptyChart = true; List<Edition> eds = new ArrayList<Edition>(geneGOMap.keySet()); Collections.sort(eds, Collections.reverseOrder()); boolean firstRun = true; Set<GeneOntologyTerm> termsToEnrich = new HashSet<>(); for (Edition ed : eds) { Map<Gene, Set<GeneOntologyTerm>> data = geneGOMap.get(ed); int populationSize = cache.getGenePopulation(currentSpeciesId, ed); int k = data.keySet().size(); Set<GeneOntologyTerm> termsInEdition = allTerms.get(ed); // // TODO Multiple tests correction lowered because we're testing less terms? // if ( !firstRun ) { // termsInEdition = new HashSet<>( termsInEdition ); // termsInEdition.retainAll( termsToEnrich ); // }//from ww w.ja v a 2 s . c o m Map<GeneOntologyTerm, Double> enrich = enrichmentData.get(ed); if (enrich == null) { enrich = new HashMap<>(); enrichmentData.put(ed, enrich); } if (termsInEdition != null) { int bcorr = termsInEdition.size(); for (GeneOntologyTerm term : termsInEdition) { if (!firstRun && !termsToEnrich.contains(term)) { // Term is part of this edition however did not pass the threshold in the current edition continue; } Integer populationAnnotated = cache.getGoSetSizes(currentSpeciesId, ed.getEdition(), term.getGoId()); if (populationAnnotated != null && populationAnnotated >= minAnnotedPopulation) { HypergeometricDistribution hyper = new HypergeometricDistribution(populationSize, populationAnnotated, k); int r = 0; for (Entry<Gene, Set<GeneOntologyTerm>> geneEntry : data.entrySet()) { if (geneEntry.getValue().contains(term)) { r++; } } // log.info( Arrays.asList( populationSize, populationAnnotated, k, r ) ); Double res = hyper.upperCumulativeProbability(r); if (bonferroniCorrection) { res = bcorr * res; } res = Math.min(res, 1); if (!firstRun || res <= pThreshold) { enrich.put(term, res); emptyChart = false; if (firstRun) { termsToEnrich.add(term); } } else { // log.info( term + " skipped; p-value not in range" ); } // log.info( "Edition: (" + ed.getEdition() + ") " + hyper.upperCumulativeProbability( r ) ); } else { // log.info( term + " skipped; too small or isn't annotated" ); } } } firstRun = false; } if (!emptyChart) { enrichmentChart = createChart(enrichmentData, "Enrichment Stability", "Date", "p-value"); chartReady = true; log.info("Chart created"); } else { chartReady = false; log.info("Chart Empty"); } }