List of usage examples for org.apache.commons.math3.stat.inference AlternativeHypothesis GREATER_THAN
AlternativeHypothesis GREATER_THAN
To view the source code for org.apache.commons.math3.stat.inference AlternativeHypothesis GREATER_THAN.
Click Source Link
From source file:cn.edu.pku.cbi.mosaichunter.filter.CompleteLinkageFilter.java
private boolean doFilter(Site site, SAMRecord[] reads) { String chrName = site.getRefName(); int minReadQuality = ConfigManager.getInstance().getInt(null, "min_read_quality", 0); int minMappingQuality = ConfigManager.getInstance().getInt(null, "min_mapping_quality", 0); // find out all related positions Map<Integer, PositionEntry> positions = new HashMap<Integer, PositionEntry>(); for (int i = 0; i < site.getDepth(); ++i) { if (reads[i] == null || !chrName.equals(reads[i].getReferenceName())) { continue; }//from w w w . j av a2 s.c o m byte base = site.getBases()[i]; if (base != site.getMajorAllele() && base != site.getMinorAllele()) { continue; } boolean isMajor = base == site.getMajorAllele(); for (int j = 0; j < reads[i].getReadLength(); ++j) { if (reads[i].getBaseQualities()[j] < minReadQuality) { continue; } if (reads[i].getMappingQuality() < minMappingQuality) { continue; } int id = MosaicHunterHelper.BASE_TO_ID[reads[i].getReadBases()[j]]; if (id < 0) { continue; } int pos = reads[i].getReferencePositionAtReadPosition(j + 1); if (pos == site.getRefPos()) { continue; } PositionEntry entry = positions.get(pos); if (entry == null) { entry = new PositionEntry(); positions.put(pos, entry); } entry.count[id]++; if (isMajor) { entry.majorCount[id]++; } else { entry.minorCount[id]++; } } } // for each position for (Integer pos : positions.keySet()) { PositionEntry entry = positions.get(pos); int[] ids = MosaicHunterHelper.sortAlleleCount(entry.count); int majorId = ids[0]; int minorId = ids[1]; int diagonalSum1 = entry.majorCount[majorId] + entry.minorCount[minorId]; int diagonalSum2 = entry.majorCount[minorId] + entry.minorCount[majorId]; int totalSum = diagonalSum1 + diagonalSum2; int smallDiagonalSum = diagonalSum1; if (diagonalSum2 < diagonalSum1) { smallDiagonalSum = diagonalSum2; } if (new BinomialTest().binomialTest(totalSum, smallDiagonalSum, this.binomErrorRate, AlternativeHypothesis.GREATER_THAN) < binomPValueCutoff) { continue; } double p = FishersExactTest.twoSided(entry.majorCount[majorId], entry.majorCount[minorId], entry.minorCount[majorId], entry.minorCount[minorId]); if (p < fisherPValueCutoff) { char major1 = (char) site.getMajorAllele(); char minor1 = (char) site.getMinorAllele(); char major2 = (char) MosaicHunterHelper.ID_TO_BASE[majorId]; char minor2 = (char) MosaicHunterHelper.ID_TO_BASE[minorId]; site.setMetadata(getName(), new Object[] { pos, "" + major1 + major2 + ":" + entry.majorCount[majorId], "" + major1 + minor2 + ":" + entry.majorCount[minorId], "" + minor1 + major2 + ":" + entry.minorCount[majorId], "" + minor1 + minor2 + ":" + entry.minorCount[minorId], p }); return false; } } return true; }
From source file:uk.ac.babraham.SeqMonk.Filters.BinomialFilterForRev.java
protected void generateProbeList() { boolean aboveOnly = false; boolean belowOnly = false; if (options.directionBox.getSelectedItem().equals("Above")) aboveOnly = true;//from w ww . j a v a 2 s. c om else if (options.directionBox.getSelectedItem().equals("Below")) belowOnly = true; if (options.stringencyField.getText().length() == 0) { stringency = 0.05; } else { stringency = Double.parseDouble(options.stringencyField.getText()); } if (options.minObservationsField.getText().length() == 0) { minObservations = 10; } else { minObservations = Integer.parseInt(options.minObservationsField.getText()); } if (options.minDifferenceField.getText().length() == 0) { minPercentShift = 10; } else { minPercentShift = Integer.parseInt(options.minDifferenceField.getText()); } applyMultipleTestingCorrection = options.multiTestBox.isSelected(); ProbeList newList; if (applyMultipleTestingCorrection) { newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value"); } else { newList = new ProbeList(startingList, "Filtered Probes", "", "P-value"); } Probe[] probes = startingList.getAllProbes(); // We need to create a set of mean end methylation values for all starting values // We found to the nearest percent so we'll end up with a set of 101 values (0-100) // which are the expected end points double[] expectedEnds = calculateEnds(probes); if (expectedEnds == null) return; // They cancelled whilst calculating. for (int i = 0; i < expectedEnds.length; i++) { System.err.println("" + i + "\t" + expectedEnds[i]); } // This is where we'll store any hits Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>(); BinomialTest bt = new BinomialTest(); AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED; if (aboveOnly) hypothesis = AlternativeHypothesis.GREATER_THAN; if (belowOnly) hypothesis = AlternativeHypothesis.LESS_THAN; for (int p = 0; p < probes.length; p++) { if (p % 100 == 0) { progressUpdated("Processed " + p + " probes", p, probes.length); } if (cancel) { cancel = false; progressCancelled(); return; } long[] reads = fromStore.getReadsForProbe(probes[p]); int forCount = 0; int revCount = 0; for (int r = 0; r < reads.length; r++) { if (SequenceRead.strand(reads[r]) == Location.FORWARD) { ++forCount; } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) { ++revCount; } } if (forCount + revCount < minObservations) continue; int fromPercent = Math.round((forCount * 100f) / (forCount + revCount)); // We need to calculate the confidence range for the from reads and work // out the most pessimistic value we could take as a starting value WilsonScoreInterval wi = new WilsonScoreInterval(); ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency); // System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100); reads = toStore.getReadsForProbe(probes[p]); forCount = 0; revCount = 0; for (int r = 0; r < reads.length; r++) { if (SequenceRead.strand(reads[r]) == Location.FORWARD) { ++forCount; } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) { ++revCount; } } if (forCount + revCount < minObservations) continue; float toPercent = (forCount * 100f) / (forCount + revCount); // System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]); // Find the most pessimistic fromPercent such that the expected toPercent is as close // to the observed value based on the confidence interval we calculated before. double worseCaseExpectedPercent = 0; double smallestTheoreticalToActualDiff = 100; // Just taking the abs diff can still leave us with a closest value which is still // quite far from where we are. We therefore also check if our confidence interval // gives us a potential value range which spans the actual value, and if it does we // fail it without even running the test. boolean seenLower = false; boolean seenHigher = false; for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math .min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) { double expectedPercent = expectedEnds[m]; double diff = expectedPercent - toPercent; if (diff <= 0) seenLower = true; if (diff >= 0) seenHigher = true; if (Math.abs(diff) < smallestTheoreticalToActualDiff) { worseCaseExpectedPercent = expectedPercent; smallestTheoreticalToActualDiff = Math.abs(diff); } } // System.err.println("Worst case percent is "+worseCaseExpectedPercent+" with diff of "+smallestTheoreticalToActualDiff+" to "+toPercent); // Sanity check if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) { throw new IllegalStateException("Can't have a worst case which is better than the actual"); } if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift) continue; // Check the directionality if (aboveOnly && worseCaseExpectedPercent - toPercent > 0) continue; if (belowOnly && worseCaseExpectedPercent - toPercent < 0) continue; // Now perform the Binomial test. double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis); if (seenLower && seenHigher) pValue = 0.5; // Our confidence range spanned the actual value we had so we can't be significant // System.err.println("P value is "+pValue); // Store this as a potential hit (after correcting p-values later) hits.add(new ProbeTTestValue(probes[p], pValue)); } // Now we can correct the p-values if we need to ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]); if (applyMultipleTestingCorrection) { // System.err.println("Correcting for "+rawHits.length+" tests"); BenjHochFDR.calculateQValues(rawHits); } for (int h = 0; h < rawHits.length; h++) { if (applyMultipleTestingCorrection) { if (rawHits[h].q < stringency) { newList.addProbe(rawHits[h].probe, (float) rawHits[h].q); } } else { if (rawHits[h].p < stringency) { newList.addProbe(rawHits[h].probe, (float) rawHits[h].p); } } } filterFinished(newList); }