List of usage examples for org.apache.commons.math3.stat.inference AlternativeHypothesis TWO_SIDED
AlternativeHypothesis TWO_SIDED
To view the source code for org.apache.commons.math3.stat.inference AlternativeHypothesis TWO_SIDED.
Click Source Link
From source file:edu.usc.cssl.tacit.classify.naivebayes.services.Vectors2Classify.java
public static ArrayList<String> main(String[] args) throws bsh.EvalError, java.io.IOException { result.clear();/*from ww w . ja v a 2 s . c o m*/ classifierTrainerStrings = new ArrayList<String>(); ReportOptions = new boolean[][] { { false, false, false, false }, { false, false, false, false }, { false, false, false, false } }; double pvalue = 0; // Process the command-line options CommandOption.setSummary(Vectors2Classify.class, "A tool for training, saving and printing diagnostics from a classifier on vectors."); CommandOption.process(Vectors2Classify.class, args); // handle default trainer here for now; default argument processing // doesn't work if (!trainerConstructor.wasInvoked()) { classifierTrainerStrings.add("new NaiveBayesTrainer()"); } if (!report.wasInvoked()) { ReportOptions = new boolean[][] { { true, false, false, false }, { true, false, true, false }, { false, false, false, false } }; //report.postParsing(null); // force postprocessing of default value } int verbosity = verbosityOption.value; Logger rootLogger = ((MalletLogger) progressLogger).getRootLogger(); if (verbosityOption.wasInvoked()) { rootLogger.setLevel(MalletLogger.LoggingLevels[verbosity]); } if (noOverwriteProgressMessagesOption.value == false) { // install special formatting for progress messages // find console handler on root logger; change formatter to one // that knows about progress messages Handler[] handlers = rootLogger.getHandlers(); for (int i = 0; i < handlers.length; i++) { if (handlers[i] instanceof ConsoleHandler) { handlers[i].setFormatter(new ProgressMessageLogFormatter()); } } } boolean separateIlists = testFile.wasInvoked() || trainingFile.wasInvoked() || validationFile.wasInvoked(); InstanceList ilist = null; InstanceList testFileIlist = null; InstanceList trainingFileIlist = null; InstanceList validationFileIlist = null; if (!separateIlists) { // normal case, --input-file specified // Read in the InstanceList, from stdin if the input filename is // "-". ilist = InstanceList.load(new File(inputFile.value)); //ilist = new InstanceList(ilist.getAlphabet(), ilist.getAlphabet()); } else { // user specified separate files for testing and training sets. trainingFileIlist = InstanceList.load(new File(trainingFile.value)); logger.info("Training vectors loaded from " + trainingFile.value); if (testFile.wasInvoked()) { testFileIlist = InstanceList.load(new File(testFile.value)); logger.info("Testing vectors loaded from " + testFile.value); if (!testFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException(trainingFileIlist.getPipe().getDataAlphabet() + "\n" + testFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + testFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and testing alphabets don't match!\n"); } } if (validationFile.wasInvoked()) { validationFileIlist = InstanceList.load(new File(validationFile.value)); logger.info("validation vectors loaded from " + validationFile.value); if (!validationFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException(trainingFileIlist.getPipe().getDataAlphabet() + "\n" + validationFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + validationFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and validation alphabets don't match!\n"); } } else { validationFileIlist = new InstanceList(new cc.mallet.pipe.Noop()); } } if (crossValidation.wasInvoked() && trainingProportionOption.wasInvoked()) { logger.warning( "Both --cross-validation and --training-portion were invoked. Using cross validation with " + crossValidation.value + " folds."); } if (crossValidation.wasInvoked() && validationProportionOption.wasInvoked()) { logger.warning( "Both --cross-validation and --validation-portion were invoked. Using cross validation with " + crossValidation.value + " folds."); } if (crossValidation.wasInvoked() && numTrialsOption.wasInvoked()) { logger.warning("Both --cross-validation and --num-trials were invoked. Using cross validation with " + crossValidation.value + " folds."); } int numTrials; if (crossValidation.wasInvoked()) { numTrials = crossValidation.value; } else { numTrials = numTrialsOption.value; } Random r = randomSeedOption.wasInvoked() ? new Random(randomSeedOption.value) : new Random(); int numTrainers = classifierTrainerStrings.size(); double trainAccuracy[][] = new double[numTrainers][numTrials]; double testAccuracy[][] = new double[numTrainers][numTrials]; double validationAccuracy[][] = new double[numTrainers][numTrials]; String trainConfusionMatrix[][] = new String[numTrainers][numTrials]; String testConfusionMatrix[][] = new String[numTrainers][numTrials]; String validationConfusionMatrix[][] = new String[numTrainers][numTrials]; double t = trainingProportionOption.value; double v = validationProportionOption.value; if (!separateIlists) { if (crossValidation.wasInvoked()) { logger.info("Cross-validation folds = " + crossValidation.value); } else { logger.info("Training portion = " + t); logger.info(" Unlabeled training sub-portion = " + unlabeledProportionOption.value); logger.info("Validation portion = " + v); logger.info("Testing portion = " + (1 - v - t)); } } // for (int i=0; i<3; i++){ // for (int j=0; j<4; j++){ // System.out.print(" " + ReportOptions[i][j]); // } // System.out.println(); // } CrossValidationIterator cvIter; if (crossValidation.wasInvoked()) { if (crossValidation.value < 2) { throw new RuntimeException( "At least two folds (set with --cross-validation) are required for cross validation"); } //System.out.println("Alphabets : "+ ilist.getDataAlphabet() +":"+ ilist.getTargetAlphabet()); cvIter = new CrossValidationIterator(ilist, crossValidation.value, r); } else { cvIter = null; } String[] trainerNames = new String[numTrainers]; for (int trialIndex = 0; trialIndex < numTrials; trialIndex++) { System.out.println("\n-------------------- Trial " + trialIndex + " --------------------\n"); InstanceList[] ilists; BitSet unlabeledIndices = null; if (!separateIlists) { if (crossValidation.wasInvoked()) { InstanceList[] cvSplit = cvIter.next(); ilists = new InstanceList[3]; ilists[0] = cvSplit[0]; ilists[1] = cvSplit[1]; ilists[2] = cvSplit[0].cloneEmpty(); } else { ilists = ilist.split(r, new double[] { t, 1 - t - v, v }); } } else { ilists = new InstanceList[3]; ilists[0] = trainingFileIlist; ilists[1] = testFileIlist; ilists[2] = validationFileIlist; } if (unlabeledProportionOption.value > 0) unlabeledIndices = new cc.mallet.util.Randoms(r.nextInt()).nextBitSet(ilists[0].size(), unlabeledProportionOption.value); // InfoGain ig = new InfoGain (ilists[0]); // int igl = Math.min (10, ig.numLocations()); // for (int i = 0; i < igl; i++) // System.out.println // ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); // ig.print(); // FeatureSelection selectedFeatures = new FeatureSelection (ig, // 8000); // ilists[0].setFeatureSelection (selectedFeatures); // OddsRatioFeatureInducer orfi = new OddsRatioFeatureInducer // (ilists[0]); // orfi.induceFeatures (ilists[0], false, true); // System.out.println // ("Training with "+ilists[0].size()+" instances"); long time[] = new long[numTrainers]; for (int c = 0; c < numTrainers; c++) { time[c] = System.currentTimeMillis(); ClassifierTrainer trainer = getTrainer(classifierTrainerStrings.get(c)); trainer.setValidationInstances(ilists[2]); // ConsoleView.writeInConsole("Trial " + trialIndex + " Training " + trainer + " with " + ilists[0].size() + " instances"); ConsoleView.printlInConsoleln("Training " + trainer + " with " + ilists[0].size() + " instances"); if (unlabeledProportionOption.value > 0) ilists[0].hideSomeLabels(unlabeledIndices); Classifier classifier = trainer.train(ilists[0]); if (unlabeledProportionOption.value > 0) ilists[0].unhideAllLabels(); //ConsoleView.writeInConsole("Trial " + trialIndex + " Training " + trainer.toString() + " finished"); ConsoleView.printlInConsoleln("Training " + trainer.toString() + " finished"); time[c] = System.currentTimeMillis() - time[c]; Trial trainTrial = new Trial(classifier, ilists[0]); // assert (ilists[1].size() > 0); Trial testTrial = new Trial(classifier, ilists[1]); Trial validationTrial = new Trial(classifier, ilists[2]); // gdruck - only perform evaluation if requested in report // options if (ReportOptions[ReportOption.train][ReportOption.confusion] && ilists[0].size() > 0) trainConfusionMatrix[c][trialIndex] = new ConfusionMatrix(trainTrial).toString(); if (ReportOptions[ReportOption.test][ReportOption.confusion] && ilists[1].size() > 0) testConfusionMatrix[c][trialIndex] = new ConfusionMatrix(testTrial).toString(); if (ReportOptions[ReportOption.validation][ReportOption.confusion] && ilists[2].size() > 0) validationConfusionMatrix[c][trialIndex] = new ConfusionMatrix(validationTrial).toString(); // gdruck - only perform evaluation if requested in report // options if (ReportOptions[ReportOption.train][ReportOption.accuracy]) trainAccuracy[c][trialIndex] = trainTrial.getAccuracy(); if (ReportOptions[ReportOption.test][ReportOption.accuracy]) testAccuracy[c][trialIndex] = testTrial.getAccuracy(); if (ReportOptions[ReportOption.validation][ReportOption.accuracy]) validationAccuracy[c][trialIndex] = validationTrial.getAccuracy(); if (outputFile.wasInvoked()) { String filename = outputFile.value; if (numTrainers > 1) filename = filename + trainer.toString(); if (numTrials > 1) filename = filename + ".trial" + trialIndex; try { ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filename)); oos.writeObject(classifier); oos.close(); } catch (Exception e) { e.printStackTrace(); throw new IllegalArgumentException("Couldn't write classifier to filename " + filename); } } // New Reporting // raw output if (ReportOptions[ReportOption.train][ReportOption.raw]) { System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString()); System.out.println(" Raw Training Data"); printTrialClassification(trainTrial); } if (ReportOptions[ReportOption.test][ReportOption.raw]) { System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString()); System.out.println(" Raw Testing Data"); printTrialClassification(testTrial); //System.out.println("Report Option :"+(ReportOptions[ReportOption.test][ReportOption.raw])); } if (ReportOptions[ReportOption.validation][ReportOption.raw]) { System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString()); System.out.println(" Raw Validation Data"); printTrialClassification(validationTrial); } System.out.println( "Bino test vars size " + ilists[1].size() + "and accuracy + " + testTrial.getAccuracy() + " then success " + (int) testTrial.getAccuracy() * ilists[1].size()); BinomialTest binomtest = new BinomialTest(); double p = 0.5; // train if (ReportOptions[ReportOption.train][ReportOption.confusion]) { //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " Training Data Confusion Matrix"); ConsoleView.printlInConsoleln(trainer.toString() + " Training Data Confusion Matrix"); if (ilists[0].size() > 0) ConsoleView.printlInConsoleln(trainConfusionMatrix[c][trialIndex]); } if (ReportOptions[ReportOption.train][ReportOption.accuracy]) { pvalue = binomtest.binomialTest(ilists[0].size(), (int) (trainTrial.getAccuracy() * ilists[0].size()), p, AlternativeHypothesis.TWO_SIDED); if (pvalue != 0) { if (pvalue > 0.5) pvalue = Math.abs(pvalue - 1); ConsoleView.printlInConsoleln("Binomial 2-Sided P value = " + pvalue + "\n"); } //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " training data accuracy= " + trainAccuracy[c][trialIndex]); ConsoleView.printlInConsoleln( trainer.toString() + " training data accuracy= " + trainAccuracy[c][trialIndex]); } if (ReportOptions[ReportOption.train][ReportOption.f1]) { String label = ReportOptionArgs[ReportOption.train][ReportOption.f1]; //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer "+ trainer.toString() + " training data F1(" + label + ") = " + trainTrial.getF1(label)); ConsoleView.printlInConsoleln( trainer.toString() + " training data F1(" + label + ") = " + trainTrial.getF1(label)); } // validation if (ReportOptions[ReportOption.validation][ReportOption.confusion]) { // ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " Validation Data Confusion Matrix"); ConsoleView.printlInConsoleln(trainer.toString() + " Validation Data Confusion Matrix"); if (ilists[2].size() > 0) ConsoleView.printlInConsoleln(validationConfusionMatrix[c][trialIndex]); } if (ReportOptions[ReportOption.validation][ReportOption.accuracy]) { //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " validation data accuracy= " + validationAccuracy[c][trialIndex]); ConsoleView.printlInConsoleln( trainer.toString() + " validation data accuracy= " + validationAccuracy[c][trialIndex]); } if (ReportOptions[ReportOption.validation][ReportOption.f1]) { String label = ReportOptionArgs[ReportOption.validation][ReportOption.f1]; //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " validation data F1(" + label + ") = " + validationTrial.getF1(label)); ConsoleView.printlInConsoleln(trainer.toString() + " validation data F1(" + label + ") = " + validationTrial.getF1(label)); } // test if (ReportOptions[ReportOption.test][ReportOption.confusion]) { //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " Test Data Confusion Matrix"); ConsoleView.printlInConsoleln(trainer.toString() + " Test Data Confusion Matrix"); if (ilists[1].size() > 0) ConsoleView.printlInConsoleln(testConfusionMatrix[c][trialIndex]); } if (ReportOptions[ReportOption.test][ReportOption.accuracy]) { pvalue = binomtest.binomialTest(ilists[1].size(), (int) (testTrial.getAccuracy() * ilists[1].size()), 0.5, AlternativeHypothesis.TWO_SIDED); if (pvalue != 0) { if (pvalue > 0.5) pvalue = Math.abs(pvalue - 1); ConsoleView.printlInConsoleln("Binomial 2-Sided P value = " + pvalue + " \n"); } //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " test data accuracy= " + testAccuracy[c][trialIndex]); ConsoleView.printlInConsoleln( trainer.toString() + " test data accuracy= " + testAccuracy[c][trialIndex]); } if (ReportOptions[ReportOption.test][ReportOption.f1]) { String label = ReportOptionArgs[ReportOption.test][ReportOption.f1]; //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " test data F1(" + label + ") = " + testTrial.getF1(label)); ConsoleView.printlInConsoleln( trainer.toString() + " test data F1(" + label + ") = " + testTrial.getF1(label)); } if (trialIndex == 0) trainerNames[c] = trainer.toString(); } // end for each trainer } // end for each trial // New reporting // "[train|test|validation]:[accuracy|f1|confusion|raw]" for (int c = 0; c < numTrainers; c++) { ConsoleView.printlInConsole("\n" + trainerNames[c].toString() + "\n"); if (ReportOptions[ReportOption.train][ReportOption.accuracy]) { /*ConsoleView.printlInConsoleln("Summary. train accuracy mean = " + MatrixOps.mean(trainAccuracy[c]) + " stddev = " + MatrixOps.stddev(trainAccuracy[c]) + " stderr = " + MatrixOps.stderr(trainAccuracy[c])); */ String trainResult = ""; if (pvalue != 0) trainResult += "Summary. train accuracy = " + MatrixOps.mean(trainAccuracy[c]); else trainResult += "Summary. train accuracy = " + MatrixOps.mean(trainAccuracy[c]); if (numTrials > 1) { trainResult += " stddev = " + MatrixOps.stddev(trainAccuracy[c]) + " stderr = " + MatrixOps.stderr(trainAccuracy[c]); } ConsoleView.printlInConsoleln(trainResult); } if (ReportOptions[ReportOption.validation][ReportOption.accuracy]) { /* ConsoleView.printlInConsoleln("Summary. validation accuracy mean = " + MatrixOps.mean(validationAccuracy[c]) + " stddev = " + MatrixOps.stddev(validationAccuracy[c]) + " stderr = " + MatrixOps.stderr(validationAccuracy[c]));*/ String validationResult = ""; if (pvalue != 0) validationResult += "Summary. validation accuracy = " + MatrixOps.mean(validationAccuracy[c]); else validationResult += "Summary. validation accuracy = " + MatrixOps.mean(validationAccuracy[c]); if (numTrials > 1) { validationResult += " stddev = " + MatrixOps.stddev(validationAccuracy[c]) + " stderr = " + MatrixOps.stderr(validationAccuracy[c]); } ConsoleView.printlInConsoleln(validationResult); } if (ReportOptions[ReportOption.test][ReportOption.accuracy]) { String testResult = ""; if (pvalue != 0) testResult += "Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c]) + " Binomial 2-Sided Pvalue = " + pvalue; else testResult += "Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c]) + " Pvalue < 10^(-1022)\n"; if (numTrials > 1) { testResult += " stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = " + MatrixOps.stderr(testAccuracy[c]); } ConsoleView.printlInConsoleln(testResult); /* if (pvalue != 0) ConsoleView.printlInConsoleln("Summary. test accuracy mean = " + MatrixOps.mean(testAccuracy[c]) + " stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = " + MatrixOps.stderr(testAccuracy[c]) + " pvalue = " + pvalue); else ConsoleView.printlInConsoleln("Summary. test accuracy mean = " + MatrixOps.mean(testAccuracy[c]) + " stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = " + MatrixOps.stderr(testAccuracy[c]) + " P value < 10^(-1022)\n"); */ } // If we are testing the classifier with two folders, result will be // empty - no report is generated if (result.isEmpty()) { if (pvalue != 0) result.add("Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c]) + " Binomial 2-Sided Pvalue = " + pvalue); else result.add("Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c]) + " Pvalue < 10^(-1022)\n"); if (numTrials > 1) { result.add(" stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = " + MatrixOps.stderr(testAccuracy[c])); } } } // end for each trainer return result; }
From source file:org.lemurproject.galago.core.eval.compare.SignTest.java
@Override public double evaluate(double[] baseline, double[] treatment) { int treatmentIsBetter = 0; int different = 0; for (int i = 0; i < treatment.length; i++) { double boostedBaseline = baseline[i] * boost; if (treatment[i] > boostedBaseline) { treatmentIsBetter++;/* w w w. ja v a 2 s. c o m*/ } if (Math.abs(treatment[i] - boostedBaseline) > tolerance) { different++; } } //- A sign test is pointless if there are no difference between // test pairs. Just bail out here if (different == 0) { return 1.0; } double pvalue; //pvalue = Stat.binomialProb(0.5, different, treatmentIsBetter); //- Use Apache Commons Math3 Binomial (Sign) Test directly. Assume // generic, two-sided test. For one-sided test, change to // AlternativeHypothesis GREATER_THAN (right-sided) or // LESS_THAN (left-sided) tests. BinomialTest bt = new BinomialTest(); pvalue = bt.binomialTest(different, treatmentIsBetter, 0.5, AlternativeHypothesis.TWO_SIDED); return pvalue; }
From source file:uk.ac.babraham.SeqMonk.Filters.BinomialFilterForRev.java
protected void generateProbeList() { boolean aboveOnly = false; boolean belowOnly = false; if (options.directionBox.getSelectedItem().equals("Above")) aboveOnly = true;//from w w w. j a v a 2 s . c om else if (options.directionBox.getSelectedItem().equals("Below")) belowOnly = true; if (options.stringencyField.getText().length() == 0) { stringency = 0.05; } else { stringency = Double.parseDouble(options.stringencyField.getText()); } if (options.minObservationsField.getText().length() == 0) { minObservations = 10; } else { minObservations = Integer.parseInt(options.minObservationsField.getText()); } if (options.minDifferenceField.getText().length() == 0) { minPercentShift = 10; } else { minPercentShift = Integer.parseInt(options.minDifferenceField.getText()); } applyMultipleTestingCorrection = options.multiTestBox.isSelected(); ProbeList newList; if (applyMultipleTestingCorrection) { newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value"); } else { newList = new ProbeList(startingList, "Filtered Probes", "", "P-value"); } Probe[] probes = startingList.getAllProbes(); // We need to create a set of mean end methylation values for all starting values // We found to the nearest percent so we'll end up with a set of 101 values (0-100) // which are the expected end points double[] expectedEnds = calculateEnds(probes); if (expectedEnds == null) return; // They cancelled whilst calculating. for (int i = 0; i < expectedEnds.length; i++) { System.err.println("" + i + "\t" + expectedEnds[i]); } // This is where we'll store any hits Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>(); BinomialTest bt = new BinomialTest(); AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED; if (aboveOnly) hypothesis = AlternativeHypothesis.GREATER_THAN; if (belowOnly) hypothesis = AlternativeHypothesis.LESS_THAN; for (int p = 0; p < probes.length; p++) { if (p % 100 == 0) { progressUpdated("Processed " + p + " probes", p, probes.length); } if (cancel) { cancel = false; progressCancelled(); return; } long[] reads = fromStore.getReadsForProbe(probes[p]); int forCount = 0; int revCount = 0; for (int r = 0; r < reads.length; r++) { if (SequenceRead.strand(reads[r]) == Location.FORWARD) { ++forCount; } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) { ++revCount; } } if (forCount + revCount < minObservations) continue; int fromPercent = Math.round((forCount * 100f) / (forCount + revCount)); // We need to calculate the confidence range for the from reads and work // out the most pessimistic value we could take as a starting value WilsonScoreInterval wi = new WilsonScoreInterval(); ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency); // System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100); reads = toStore.getReadsForProbe(probes[p]); forCount = 0; revCount = 0; for (int r = 0; r < reads.length; r++) { if (SequenceRead.strand(reads[r]) == Location.FORWARD) { ++forCount; } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) { ++revCount; } } if (forCount + revCount < minObservations) continue; float toPercent = (forCount * 100f) / (forCount + revCount); // System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]); // Find the most pessimistic fromPercent such that the expected toPercent is as close // to the observed value based on the confidence interval we calculated before. double worseCaseExpectedPercent = 0; double smallestTheoreticalToActualDiff = 100; // Just taking the abs diff can still leave us with a closest value which is still // quite far from where we are. We therefore also check if our confidence interval // gives us a potential value range which spans the actual value, and if it does we // fail it without even running the test. boolean seenLower = false; boolean seenHigher = false; for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math .min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) { double expectedPercent = expectedEnds[m]; double diff = expectedPercent - toPercent; if (diff <= 0) seenLower = true; if (diff >= 0) seenHigher = true; if (Math.abs(diff) < smallestTheoreticalToActualDiff) { worseCaseExpectedPercent = expectedPercent; smallestTheoreticalToActualDiff = Math.abs(diff); } } // System.err.println("Worst case percent is "+worseCaseExpectedPercent+" with diff of "+smallestTheoreticalToActualDiff+" to "+toPercent); // Sanity check if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) { throw new IllegalStateException("Can't have a worst case which is better than the actual"); } if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift) continue; // Check the directionality if (aboveOnly && worseCaseExpectedPercent - toPercent > 0) continue; if (belowOnly && worseCaseExpectedPercent - toPercent < 0) continue; // Now perform the Binomial test. double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis); if (seenLower && seenHigher) pValue = 0.5; // Our confidence range spanned the actual value we had so we can't be significant // System.err.println("P value is "+pValue); // Store this as a potential hit (after correcting p-values later) hits.add(new ProbeTTestValue(probes[p], pValue)); } // Now we can correct the p-values if we need to ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]); if (applyMultipleTestingCorrection) { // System.err.println("Correcting for "+rawHits.length+" tests"); BenjHochFDR.calculateQValues(rawHits); } for (int h = 0; h < rawHits.length; h++) { if (applyMultipleTestingCorrection) { if (rawHits[h].q < stringency) { newList.addProbe(rawHits[h].probe, (float) rawHits[h].q); } } else { if (rawHits[h].p < stringency) { newList.addProbe(rawHits[h].probe, (float) rawHits[h].p); } } } filterFinished(newList); }