Example usage for org.apache.commons.math3.stat.inference BinomialTest BinomialTest

List of usage examples for org.apache.commons.math3.stat.inference BinomialTest BinomialTest

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.inference BinomialTest BinomialTest.

Prototype

BinomialTest

Source Link

Usage

From source file:cn.edu.pku.cbi.mosaichunter.filter.CompleteLinkageFilter.java

private boolean doFilter(Site site, SAMRecord[] reads) {
    String chrName = site.getRefName();
    int minReadQuality = ConfigManager.getInstance().getInt(null, "min_read_quality", 0);
    int minMappingQuality = ConfigManager.getInstance().getInt(null, "min_mapping_quality", 0);

    // find out all related positions
    Map<Integer, PositionEntry> positions = new HashMap<Integer, PositionEntry>();
    for (int i = 0; i < site.getDepth(); ++i) {

        if (reads[i] == null || !chrName.equals(reads[i].getReferenceName())) {
            continue;
        }//from   w w  w.j a v  a  2 s  . c o  m
        byte base = site.getBases()[i];
        if (base != site.getMajorAllele() && base != site.getMinorAllele()) {
            continue;
        }
        boolean isMajor = base == site.getMajorAllele();
        for (int j = 0; j < reads[i].getReadLength(); ++j) {
            if (reads[i].getBaseQualities()[j] < minReadQuality) {
                continue;
            }
            if (reads[i].getMappingQuality() < minMappingQuality) {
                continue;
            }
            int id = MosaicHunterHelper.BASE_TO_ID[reads[i].getReadBases()[j]];
            if (id < 0) {
                continue;
            }
            int pos = reads[i].getReferencePositionAtReadPosition(j + 1);
            if (pos == site.getRefPos()) {
                continue;
            }
            PositionEntry entry = positions.get(pos);
            if (entry == null) {
                entry = new PositionEntry();
                positions.put(pos, entry);
            }
            entry.count[id]++;
            if (isMajor) {
                entry.majorCount[id]++;
            } else {
                entry.minorCount[id]++;
            }
        }
    }

    // for each position
    for (Integer pos : positions.keySet()) {
        PositionEntry entry = positions.get(pos);
        int[] ids = MosaicHunterHelper.sortAlleleCount(entry.count);
        int majorId = ids[0];
        int minorId = ids[1];

        int diagonalSum1 = entry.majorCount[majorId] + entry.minorCount[minorId];
        int diagonalSum2 = entry.majorCount[minorId] + entry.minorCount[majorId];
        int totalSum = diagonalSum1 + diagonalSum2;
        int smallDiagonalSum = diagonalSum1;
        if (diagonalSum2 < diagonalSum1) {
            smallDiagonalSum = diagonalSum2;
        }

        if (new BinomialTest().binomialTest(totalSum, smallDiagonalSum, this.binomErrorRate,
                AlternativeHypothesis.GREATER_THAN) < binomPValueCutoff) {
            continue;
        }

        double p = FishersExactTest.twoSided(entry.majorCount[majorId], entry.majorCount[minorId],
                entry.minorCount[majorId], entry.minorCount[minorId]);

        if (p < fisherPValueCutoff) {
            char major1 = (char) site.getMajorAllele();
            char minor1 = (char) site.getMinorAllele();
            char major2 = (char) MosaicHunterHelper.ID_TO_BASE[majorId];
            char minor2 = (char) MosaicHunterHelper.ID_TO_BASE[minorId];
            site.setMetadata(getName(),
                    new Object[] { pos, "" + major1 + major2 + ":" + entry.majorCount[majorId],
                            "" + major1 + minor2 + ":" + entry.majorCount[minorId],
                            "" + minor1 + major2 + ":" + entry.minorCount[majorId],
                            "" + minor1 + minor2 + ":" + entry.minorCount[minorId], p });
            return false;
        }

    }
    return true;
}

From source file:edu.usc.cssl.tacit.classify.naivebayes.services.Vectors2Classify.java

public static ArrayList<String> main(String[] args) throws bsh.EvalError, java.io.IOException {
    result.clear();/*w  w w .j  a va 2s.  c  om*/
    classifierTrainerStrings = new ArrayList<String>();
    ReportOptions = new boolean[][] { { false, false, false, false }, { false, false, false, false },
            { false, false, false, false } };

    double pvalue = 0;
    // Process the command-line options
    CommandOption.setSummary(Vectors2Classify.class,
            "A tool for training, saving and printing diagnostics from a classifier on vectors.");
    CommandOption.process(Vectors2Classify.class, args);

    // handle default trainer here for now; default argument processing
    // doesn't work
    if (!trainerConstructor.wasInvoked()) {
        classifierTrainerStrings.add("new NaiveBayesTrainer()");
    }

    if (!report.wasInvoked()) {
        ReportOptions = new boolean[][] { { true, false, false, false }, { true, false, true, false },
                { false, false, false, false } };
        //report.postParsing(null); // force postprocessing of default value

    }

    int verbosity = verbosityOption.value;

    Logger rootLogger = ((MalletLogger) progressLogger).getRootLogger();

    if (verbosityOption.wasInvoked()) {
        rootLogger.setLevel(MalletLogger.LoggingLevels[verbosity]);
    }

    if (noOverwriteProgressMessagesOption.value == false) {
        // install special formatting for progress messages
        // find console handler on root logger; change formatter to one
        // that knows about progress messages
        Handler[] handlers = rootLogger.getHandlers();
        for (int i = 0; i < handlers.length; i++) {
            if (handlers[i] instanceof ConsoleHandler) {
                handlers[i].setFormatter(new ProgressMessageLogFormatter());
            }
        }
    }

    boolean separateIlists = testFile.wasInvoked() || trainingFile.wasInvoked() || validationFile.wasInvoked();
    InstanceList ilist = null;
    InstanceList testFileIlist = null;
    InstanceList trainingFileIlist = null;
    InstanceList validationFileIlist = null;

    if (!separateIlists) { // normal case, --input-file specified
        // Read in the InstanceList, from stdin if the input filename is
        // "-".
        ilist = InstanceList.load(new File(inputFile.value));
        //ilist = new InstanceList(ilist.getAlphabet(), ilist.getAlphabet());
    } else { // user specified separate files for testing and training sets.
        trainingFileIlist = InstanceList.load(new File(trainingFile.value));
        logger.info("Training vectors loaded from " + trainingFile.value);

        if (testFile.wasInvoked()) {
            testFileIlist = InstanceList.load(new File(testFile.value));
            logger.info("Testing vectors loaded from " + testFile.value);

            if (!testFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) {
                throw new RuntimeException(trainingFileIlist.getPipe().getDataAlphabet() + "\n"
                        + testFileIlist.getPipe().getDataAlphabet() + "\n"
                        + trainingFileIlist.getPipe().getTargetAlphabet() + "\n"
                        + testFileIlist.getPipe().getTargetAlphabet() + "\n"
                        + "Training and testing alphabets don't match!\n");
            }
        }

        if (validationFile.wasInvoked()) {
            validationFileIlist = InstanceList.load(new File(validationFile.value));
            logger.info("validation vectors loaded from " + validationFile.value);
            if (!validationFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) {
                throw new RuntimeException(trainingFileIlist.getPipe().getDataAlphabet() + "\n"
                        + validationFileIlist.getPipe().getDataAlphabet() + "\n"
                        + trainingFileIlist.getPipe().getTargetAlphabet() + "\n"
                        + validationFileIlist.getPipe().getTargetAlphabet() + "\n"
                        + "Training and validation alphabets don't match!\n");
            }
        } else {
            validationFileIlist = new InstanceList(new cc.mallet.pipe.Noop());
        }

    }

    if (crossValidation.wasInvoked() && trainingProportionOption.wasInvoked()) {
        logger.warning(
                "Both --cross-validation and --training-portion were invoked.  Using cross validation with "
                        + crossValidation.value + " folds.");
    }
    if (crossValidation.wasInvoked() && validationProportionOption.wasInvoked()) {
        logger.warning(
                "Both --cross-validation and --validation-portion were invoked.  Using cross validation with "
                        + crossValidation.value + " folds.");
    }
    if (crossValidation.wasInvoked() && numTrialsOption.wasInvoked()) {
        logger.warning("Both --cross-validation and --num-trials were invoked.  Using cross validation with "
                + crossValidation.value + " folds.");
    }

    int numTrials;
    if (crossValidation.wasInvoked()) {
        numTrials = crossValidation.value;
    } else {
        numTrials = numTrialsOption.value;
    }

    Random r = randomSeedOption.wasInvoked() ? new Random(randomSeedOption.value) : new Random();

    int numTrainers = classifierTrainerStrings.size();

    double trainAccuracy[][] = new double[numTrainers][numTrials];
    double testAccuracy[][] = new double[numTrainers][numTrials];
    double validationAccuracy[][] = new double[numTrainers][numTrials];

    String trainConfusionMatrix[][] = new String[numTrainers][numTrials];
    String testConfusionMatrix[][] = new String[numTrainers][numTrials];
    String validationConfusionMatrix[][] = new String[numTrainers][numTrials];

    double t = trainingProportionOption.value;
    double v = validationProportionOption.value;

    if (!separateIlists) {
        if (crossValidation.wasInvoked()) {
            logger.info("Cross-validation folds = " + crossValidation.value);
        } else {
            logger.info("Training portion = " + t);
            logger.info(" Unlabeled training sub-portion = " + unlabeledProportionOption.value);
            logger.info("Validation portion = " + v);
            logger.info("Testing portion = " + (1 - v - t));
        }
    }

    // for (int i=0; i<3; i++){
    // for (int j=0; j<4; j++){
    // System.out.print(" " + ReportOptions[i][j]);
    // }
    // System.out.println();
    // }

    CrossValidationIterator cvIter;
    if (crossValidation.wasInvoked()) {
        if (crossValidation.value < 2) {
            throw new RuntimeException(
                    "At least two folds (set with --cross-validation) are required for cross validation");
        }
        //System.out.println("Alphabets : "+ ilist.getDataAlphabet() +":"+ ilist.getTargetAlphabet());
        cvIter = new CrossValidationIterator(ilist, crossValidation.value, r);
    } else {
        cvIter = null;
    }

    String[] trainerNames = new String[numTrainers];
    for (int trialIndex = 0; trialIndex < numTrials; trialIndex++) {
        System.out.println("\n-------------------- Trial " + trialIndex + "  --------------------\n");
        InstanceList[] ilists;
        BitSet unlabeledIndices = null;
        if (!separateIlists) {
            if (crossValidation.wasInvoked()) {
                InstanceList[] cvSplit = cvIter.next();
                ilists = new InstanceList[3];
                ilists[0] = cvSplit[0];
                ilists[1] = cvSplit[1];
                ilists[2] = cvSplit[0].cloneEmpty();
            } else {
                ilists = ilist.split(r, new double[] { t, 1 - t - v, v });
            }
        } else {
            ilists = new InstanceList[3];
            ilists[0] = trainingFileIlist;
            ilists[1] = testFileIlist;
            ilists[2] = validationFileIlist;
        }

        if (unlabeledProportionOption.value > 0)
            unlabeledIndices = new cc.mallet.util.Randoms(r.nextInt()).nextBitSet(ilists[0].size(),
                    unlabeledProportionOption.value);

        // InfoGain ig = new InfoGain (ilists[0]);
        // int igl = Math.min (10, ig.numLocations());
        // for (int i = 0; i < igl; i++)
        // System.out.println
        // ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i));
        // ig.print();

        // FeatureSelection selectedFeatures = new FeatureSelection (ig,
        // 8000);
        // ilists[0].setFeatureSelection (selectedFeatures);
        // OddsRatioFeatureInducer orfi = new OddsRatioFeatureInducer
        // (ilists[0]);
        // orfi.induceFeatures (ilists[0], false, true);

        // System.out.println
        // ("Training with "+ilists[0].size()+" instances");
        long time[] = new long[numTrainers];
        for (int c = 0; c < numTrainers; c++) {
            time[c] = System.currentTimeMillis();
            ClassifierTrainer trainer = getTrainer(classifierTrainerStrings.get(c));
            trainer.setValidationInstances(ilists[2]);
            // ConsoleView.writeInConsole("Trial " + trialIndex + " Training " + trainer + " with " + ilists[0].size() + " instances");
            ConsoleView.printlInConsoleln("Training " + trainer + " with " + ilists[0].size() + " instances");
            if (unlabeledProportionOption.value > 0)
                ilists[0].hideSomeLabels(unlabeledIndices);
            Classifier classifier = trainer.train(ilists[0]);
            if (unlabeledProportionOption.value > 0)
                ilists[0].unhideAllLabels();

            //ConsoleView.writeInConsole("Trial " + trialIndex + " Training " + trainer.toString() + " finished");
            ConsoleView.printlInConsoleln("Training " + trainer.toString() + " finished");
            time[c] = System.currentTimeMillis() - time[c];
            Trial trainTrial = new Trial(classifier, ilists[0]);
            // assert (ilists[1].size() > 0);
            Trial testTrial = new Trial(classifier, ilists[1]);
            Trial validationTrial = new Trial(classifier, ilists[2]);

            // gdruck - only perform evaluation if requested in report
            // options
            if (ReportOptions[ReportOption.train][ReportOption.confusion] && ilists[0].size() > 0)
                trainConfusionMatrix[c][trialIndex] = new ConfusionMatrix(trainTrial).toString();
            if (ReportOptions[ReportOption.test][ReportOption.confusion] && ilists[1].size() > 0)
                testConfusionMatrix[c][trialIndex] = new ConfusionMatrix(testTrial).toString();
            if (ReportOptions[ReportOption.validation][ReportOption.confusion] && ilists[2].size() > 0)
                validationConfusionMatrix[c][trialIndex] = new ConfusionMatrix(validationTrial).toString();

            // gdruck - only perform evaluation if requested in report
            // options
            if (ReportOptions[ReportOption.train][ReportOption.accuracy])
                trainAccuracy[c][trialIndex] = trainTrial.getAccuracy();
            if (ReportOptions[ReportOption.test][ReportOption.accuracy])
                testAccuracy[c][trialIndex] = testTrial.getAccuracy();
            if (ReportOptions[ReportOption.validation][ReportOption.accuracy])
                validationAccuracy[c][trialIndex] = validationTrial.getAccuracy();

            if (outputFile.wasInvoked()) {
                String filename = outputFile.value;
                if (numTrainers > 1)
                    filename = filename + trainer.toString();
                if (numTrials > 1)
                    filename = filename + ".trial" + trialIndex;
                try {
                    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(filename));
                    oos.writeObject(classifier);
                    oos.close();
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write classifier to filename " + filename);
                }
            }

            // New Reporting

            // raw output
            if (ReportOptions[ReportOption.train][ReportOption.raw]) {
                System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString());
                System.out.println(" Raw Training Data");
                printTrialClassification(trainTrial);
            }

            if (ReportOptions[ReportOption.test][ReportOption.raw]) {
                System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString());
                System.out.println(" Raw Testing Data");
                printTrialClassification(testTrial);
                //System.out.println("Report Option :"+(ReportOptions[ReportOption.test][ReportOption.raw]));
            }

            if (ReportOptions[ReportOption.validation][ReportOption.raw]) {
                System.out.println("Trial " + trialIndex + " Trainer " + trainer.toString());
                System.out.println(" Raw Validation Data");
                printTrialClassification(validationTrial);
            }
            System.out.println(
                    "Bino test vars size " + ilists[1].size() + "and accuracy + " + testTrial.getAccuracy()
                            + " then success " + (int) testTrial.getAccuracy() * ilists[1].size());
            BinomialTest binomtest = new BinomialTest();
            double p = 0.5;

            // train
            if (ReportOptions[ReportOption.train][ReportOption.confusion]) {
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer "    + trainer.toString() + " Training Data Confusion Matrix");
                ConsoleView.printlInConsoleln(trainer.toString() + " Training Data Confusion Matrix");
                if (ilists[0].size() > 0)
                    ConsoleView.printlInConsoleln(trainConfusionMatrix[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.train][ReportOption.accuracy]) {
                pvalue = binomtest.binomialTest(ilists[0].size(),
                        (int) (trainTrial.getAccuracy() * ilists[0].size()), p,
                        AlternativeHypothesis.TWO_SIDED);
                if (pvalue != 0) {
                    if (pvalue > 0.5)
                        pvalue = Math.abs(pvalue - 1);
                    ConsoleView.printlInConsoleln("Binomial 2-Sided P value = " + pvalue + "\n");
                }

                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " training data accuracy= " + trainAccuracy[c][trialIndex]);
                ConsoleView.printlInConsoleln(
                        trainer.toString() + " training data accuracy= " + trainAccuracy[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.train][ReportOption.f1]) {
                String label = ReportOptionArgs[ReportOption.train][ReportOption.f1];
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer "+ trainer.toString() + " training data F1(" + label + ") = " + trainTrial.getF1(label));
                ConsoleView.printlInConsoleln(
                        trainer.toString() + " training data F1(" + label + ") = " + trainTrial.getF1(label));
            }

            // validation
            if (ReportOptions[ReportOption.validation][ReportOption.confusion]) {
                //   ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " Validation Data Confusion Matrix");
                ConsoleView.printlInConsoleln(trainer.toString() + " Validation Data Confusion Matrix");
                if (ilists[2].size() > 0)
                    ConsoleView.printlInConsoleln(validationConfusionMatrix[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.validation][ReportOption.accuracy]) {
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " validation data accuracy= " + validationAccuracy[c][trialIndex]);
                ConsoleView.printlInConsoleln(
                        trainer.toString() + " validation data accuracy= " + validationAccuracy[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.validation][ReportOption.f1]) {
                String label = ReportOptionArgs[ReportOption.validation][ReportOption.f1];
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " validation data F1(" + label + ") = " + validationTrial.getF1(label));
                ConsoleView.printlInConsoleln(trainer.toString() + " validation data F1(" + label + ") = "
                        + validationTrial.getF1(label));
            }

            // test
            if (ReportOptions[ReportOption.test][ReportOption.confusion]) {
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " Test Data Confusion Matrix");
                ConsoleView.printlInConsoleln(trainer.toString() + " Test Data Confusion Matrix");
                if (ilists[1].size() > 0)
                    ConsoleView.printlInConsoleln(testConfusionMatrix[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.test][ReportOption.accuracy]) {
                pvalue = binomtest.binomialTest(ilists[1].size(),
                        (int) (testTrial.getAccuracy() * ilists[1].size()), 0.5,
                        AlternativeHypothesis.TWO_SIDED);
                if (pvalue != 0) {
                    if (pvalue > 0.5)
                        pvalue = Math.abs(pvalue - 1);
                    ConsoleView.printlInConsoleln("Binomial 2-Sided P value = " + pvalue + " \n");
                }

                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " test data accuracy= " + testAccuracy[c][trialIndex]);
                ConsoleView.printlInConsoleln(
                        trainer.toString() + " test data accuracy= " + testAccuracy[c][trialIndex]);
            }

            if (ReportOptions[ReportOption.test][ReportOption.f1]) {
                String label = ReportOptionArgs[ReportOption.test][ReportOption.f1];
                //ConsoleView.writeInConsole("Trial " + trialIndex + " Trainer " + trainer.toString() + " test data F1(" + label + ") = " + testTrial.getF1(label));
                ConsoleView.printlInConsoleln(
                        trainer.toString() + " test data F1(" + label + ") = " + testTrial.getF1(label));
            }

            if (trialIndex == 0)
                trainerNames[c] = trainer.toString();

        } // end for each trainer
    } // end for each trial

    // New reporting
    // "[train|test|validation]:[accuracy|f1|confusion|raw]"
    for (int c = 0; c < numTrainers; c++) {
        ConsoleView.printlInConsole("\n" + trainerNames[c].toString() + "\n");
        if (ReportOptions[ReportOption.train][ReportOption.accuracy]) {
            /*ConsoleView.printlInConsoleln("Summary. train accuracy mean = "
                  + MatrixOps.mean(trainAccuracy[c]) + " stddev = "
                  + MatrixOps.stddev(trainAccuracy[c]) + " stderr = "
                  + MatrixOps.stderr(trainAccuracy[c])); */

            String trainResult = "";
            if (pvalue != 0)
                trainResult += "Summary. train accuracy = " + MatrixOps.mean(trainAccuracy[c]);
            else
                trainResult += "Summary. train accuracy = " + MatrixOps.mean(trainAccuracy[c]);

            if (numTrials > 1) {
                trainResult += " stddev = " + MatrixOps.stddev(trainAccuracy[c]) + " stderr = "
                        + MatrixOps.stderr(trainAccuracy[c]);
            }
            ConsoleView.printlInConsoleln(trainResult);

        }

        if (ReportOptions[ReportOption.validation][ReportOption.accuracy]) {
            /*
            ConsoleView.printlInConsoleln("Summary. validation accuracy mean = "
                  + MatrixOps.mean(validationAccuracy[c]) + " stddev = "
                  + MatrixOps.stddev(validationAccuracy[c])
                  + " stderr = "
                  + MatrixOps.stderr(validationAccuracy[c]));*/

            String validationResult = "";
            if (pvalue != 0)
                validationResult += "Summary. validation accuracy = " + MatrixOps.mean(validationAccuracy[c]);
            else
                validationResult += "Summary. validation accuracy = " + MatrixOps.mean(validationAccuracy[c]);

            if (numTrials > 1) {
                validationResult += " stddev = " + MatrixOps.stddev(validationAccuracy[c]) + " stderr = "
                        + MatrixOps.stderr(validationAccuracy[c]);
            }
            ConsoleView.printlInConsoleln(validationResult);

        }

        if (ReportOptions[ReportOption.test][ReportOption.accuracy]) {
            String testResult = "";
            if (pvalue != 0)
                testResult += "Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c])
                        + " Binomial 2-Sided Pvalue = " + pvalue;
            else
                testResult += "Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c])
                        + " Pvalue < 10^(-1022)\n";

            if (numTrials > 1) {
                testResult += " stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = "
                        + MatrixOps.stderr(testAccuracy[c]);
            }
            ConsoleView.printlInConsoleln(testResult);

            /*
            if (pvalue != 0)
               ConsoleView.printlInConsoleln("Summary. test accuracy mean = "
             + MatrixOps.mean(testAccuracy[c]) + " stddev = "
             + MatrixOps.stddev(testAccuracy[c]) + " stderr = "
             + MatrixOps.stderr(testAccuracy[c]) + " pvalue = "
             + pvalue);
            else
               ConsoleView.printlInConsoleln("Summary. test accuracy mean = "
             + MatrixOps.mean(testAccuracy[c]) + " stddev = "
             + MatrixOps.stddev(testAccuracy[c]) + " stderr = "
             + MatrixOps.stderr(testAccuracy[c])
             + " P value < 10^(-1022)\n"); */
        }

        // If we are testing the classifier with two folders, result will be
        // empty - no report is generated
        if (result.isEmpty()) {
            if (pvalue != 0)
                result.add("Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c])
                        + " Binomial 2-Sided  Pvalue = " + pvalue);
            else
                result.add("Summary. test accuracy = " + MatrixOps.mean(testAccuracy[c])
                        + " Pvalue < 10^(-1022)\n");

            if (numTrials > 1) {
                result.add(" stddev = " + MatrixOps.stddev(testAccuracy[c]) + " stderr = "
                        + MatrixOps.stderr(testAccuracy[c]));
            }
        }
    } // end for each trainer

    return result;
}

From source file:org.lemurproject.galago.core.eval.compare.SignTest.java

@Override
public double evaluate(double[] baseline, double[] treatment) {
    int treatmentIsBetter = 0;
    int different = 0;

    for (int i = 0; i < treatment.length; i++) {
        double boostedBaseline = baseline[i] * boost;
        if (treatment[i] > boostedBaseline) {
            treatmentIsBetter++;//from w  w  w.  j  av a2 s  .  c o  m
        }
        if (Math.abs(treatment[i] - boostedBaseline) > tolerance) {
            different++;
        }
    }

    //- A sign test is pointless if there are no difference between
    //  test pairs.  Just bail out here
    if (different == 0) {
        return 1.0;
    }

    double pvalue;
    //pvalue = Stat.binomialProb(0.5, different, treatmentIsBetter);

    //- Use Apache Commons Math3 Binomial (Sign) Test directly.  Assume
    //  generic, two-sided test.  For one-sided test, change to
    //  AlternativeHypothesis GREATER_THAN (right-sided) or
    //  LESS_THAN (left-sided) tests.
    BinomialTest bt = new BinomialTest();
    pvalue = bt.binomialTest(different, treatmentIsBetter, 0.5, AlternativeHypothesis.TWO_SIDED);
    return pvalue;
}

From source file:uk.ac.babraham.SeqMonk.Filters.BinomialFilterForRev.java

protected void generateProbeList() {

    boolean aboveOnly = false;
    boolean belowOnly = false;

    if (options.directionBox.getSelectedItem().equals("Above"))
        aboveOnly = true;//from   ww w. j  a v  a  2 s . c  om
    else if (options.directionBox.getSelectedItem().equals("Below"))
        belowOnly = true;

    if (options.stringencyField.getText().length() == 0) {
        stringency = 0.05;
    } else {
        stringency = Double.parseDouble(options.stringencyField.getText());
    }
    if (options.minObservationsField.getText().length() == 0) {
        minObservations = 10;
    } else {
        minObservations = Integer.parseInt(options.minObservationsField.getText());
    }
    if (options.minDifferenceField.getText().length() == 0) {
        minPercentShift = 10;
    } else {
        minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
    }

    applyMultipleTestingCorrection = options.multiTestBox.isSelected();

    ProbeList newList;

    if (applyMultipleTestingCorrection) {
        newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
    } else {
        newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
    }

    Probe[] probes = startingList.getAllProbes();

    // We need to create a set of mean end methylation values for all starting values
    // We found to the nearest percent so we'll end up with a set of 101 values (0-100)
    // which are the expected end points
    double[] expectedEnds = calculateEnds(probes);

    if (expectedEnds == null)
        return; // They cancelled whilst calculating.

    for (int i = 0; i < expectedEnds.length; i++) {
        System.err.println("" + i + "\t" + expectedEnds[i]);
    }

    // This is where we'll store any hits
    Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
    BinomialTest bt = new BinomialTest();
    AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED;

    if (aboveOnly)
        hypothesis = AlternativeHypothesis.GREATER_THAN;
    if (belowOnly)
        hypothesis = AlternativeHypothesis.LESS_THAN;

    for (int p = 0; p < probes.length; p++) {

        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }

        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }

        long[] reads = fromStore.getReadsForProbe(probes[p]);

        int forCount = 0;
        int revCount = 0;

        for (int r = 0; r < reads.length; r++) {
            if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                ++forCount;
            } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                ++revCount;
            }
        }

        if (forCount + revCount < minObservations)
            continue;

        int fromPercent = Math.round((forCount * 100f) / (forCount + revCount));

        // We need to calculate the confidence range for the from reads and work
        // out the most pessimistic value we could take as a starting value
        WilsonScoreInterval wi = new WilsonScoreInterval();
        ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency);
        //         System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100);         

        reads = toStore.getReadsForProbe(probes[p]);

        forCount = 0;
        revCount = 0;

        for (int r = 0; r < reads.length; r++) {
            if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                ++forCount;
            } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                ++revCount;
            }
        }

        if (forCount + revCount < minObservations)
            continue;

        float toPercent = (forCount * 100f) / (forCount + revCount);

        //         System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]);

        // Find the most pessimistic fromPercent such that the expected toPercent is as close
        // to the observed value based on the confidence interval we calculated before.

        double worseCaseExpectedPercent = 0;
        double smallestTheoreticalToActualDiff = 100;

        // Just taking the abs diff can still leave us with a closest value which is still
        // quite far from where we are.  We therefore also check if our confidence interval
        // gives us a potential value range which spans the actual value, and if it does we
        // fail it without even running the test.
        boolean seenLower = false;
        boolean seenHigher = false;

        for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math
                .min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) {
            double expectedPercent = expectedEnds[m];
            double diff = expectedPercent - toPercent;
            if (diff <= 0)
                seenLower = true;
            if (diff >= 0)
                seenHigher = true;

            if (Math.abs(diff) < smallestTheoreticalToActualDiff) {
                worseCaseExpectedPercent = expectedPercent;
                smallestTheoreticalToActualDiff = Math.abs(diff);
            }
        }

        //         System.err.println("Worst case percent is "+worseCaseExpectedPercent+" with diff of "+smallestTheoreticalToActualDiff+" to "+toPercent);   

        // Sanity check
        if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) {
            throw new IllegalStateException("Can't have a worst case which is better than the actual");
        }

        if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift)
            continue;

        // Check the directionality
        if (aboveOnly && worseCaseExpectedPercent - toPercent > 0)
            continue;
        if (belowOnly && worseCaseExpectedPercent - toPercent < 0)
            continue;

        // Now perform the Binomial test.

        double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d,
                hypothesis);

        if (seenLower && seenHigher)
            pValue = 0.5; // Our confidence range spanned the actual value we had so we can't be significant

        //         System.err.println("P value is "+pValue);

        // Store this as a potential hit (after correcting p-values later)
        hits.add(new ProbeTTestValue(probes[p], pValue));

    }

    // Now we can correct the p-values if we need to

    ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);

    if (applyMultipleTestingCorrection) {

        //         System.err.println("Correcting for "+rawHits.length+" tests");
        BenjHochFDR.calculateQValues(rawHits);
    }

    for (int h = 0; h < rawHits.length; h++) {
        if (applyMultipleTestingCorrection) {
            if (rawHits[h].q < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
            }
        } else {
            if (rawHits[h].p < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
            }
        }
    }

    filterFinished(newList);

}