Example usage for org.apache.commons.math3.stat.ranking NaturalRanking NaturalRanking

List of usage examples for org.apache.commons.math3.stat.ranking NaturalRanking NaturalRanking

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.ranking NaturalRanking NaturalRanking.

Prototype

public NaturalRanking(NaNStrategy nanStrategy, RandomGenerator randomGenerator) 

Source Link

Document

Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM and the given source of random data.

Usage

From source file:gedi.util.math.stat.testing.WilcoxonUnpaired.java

/**
 * Create a test instance where NaN's are left in place and ties get
 * the average of applicable ranks. Use this unless you are very sure
 * of what you are doing.//from   w w w. java2  s .  co  m
 */
public WilcoxonUnpaired() {
    naturalRanking = new NaturalRanking(NaNStrategy.FIXED, TiesStrategy.AVERAGE);
}

From source file:gedi.util.math.stat.testing.WilcoxonUnpaired.java

/**
 * Create a test instance using the given strategies for NaN's and ties.
 * Only use this if you are sure of what you are doing.
 *
 * @param nanStrategy/*from  w  ww .  ja va2s.  co  m*/
 *            specifies the strategy that should be used for Double.NaN's
 * @param tiesStrategy
 *            specifies the strategy that should be used for ties
 */
public WilcoxonUnpaired(final NaNStrategy nanStrategy, final TiesStrategy tiesStrategy) {
    naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
}

From source file:MannWhitneyUTest.java

/**
 * Create a test instance using where NaN's are left in place and ties get
 * the average of applicable ranks. Use this unless you are very sure of
 * what you are doing.//from w w w  .j  a v a2  s. c  om
 */
public MannWhitneyUTest() {
    System.out.println("1");
    naturalRanking = new NaturalRanking(NaNStrategy.FIXED, TiesStrategy.AVERAGE);
}

From source file:MannWhitneyUTest.java

/**
 * Create a test instance using the given strategies for NaN's and ties.
 * Only use this if you are sure of what you are doing.
 *
 * @param nanStrategy/*from   w  ww.j a va 2s  .c  o  m*/
 *            specifies the strategy that should be used for Double.NaN's
 * @param tiesStrategy
 *            specifies the strategy that should be used for ties
 */
public MannWhitneyUTest(final NaNStrategy nanStrategy, final TiesStrategy tiesStrategy) {
    System.out.println("2");
    naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
}

From source file:ch.unil.genescore.vegas.GeneDataFakePhenotype.java

private void addSignal(double[] fakePheno) {

    for (int i = 0; i < fakePheno.length; i++) {
        fakePheno[i] = fakePheno[i] + fakeSignal_[i];
    }/*from   w  ww  .ja  v  a2s  . c o m*/
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL, TiesStrategy.MAXIMUM);
    double[] ranks = ranking.rank(fakePheno);
    for (int i = 0; i < fakePheno.length; i++) {
        //fakePheno[i]=myNormal.inverseCumulativeProbability(ranks[i]/(ranks.length+1));         
        fakePheno[i] = DistributionMethods.normalInverseCumulativeProbability(ranks[i] / (ranks.length + 1));

    }
}

From source file:com.itemanalysis.jmetrik.stats.ranking.RankingAnalysis.java

public String compute() throws SQLException {
    Statement stmt = null;//from   w w  w  . j  a  va2s. c o  m
    ResultSet rs = null;

    try {
        //get data
        ResizableDoubleArray data = getData();

        //create columns - dao uses its own transaction
        int numberOfColumns = dao.getColumnCount(conn, tableName);
        int columnNumber = numberOfColumns + 1;
        String newVariableLabel = "Rank";
        if (blom)
            newVariableLabel = "Blom Normal Score";
        if (tukey)
            newVariableLabel = "Tukey Normal Score";
        if (vdw)
            newVariableLabel = "van der Waerden Normal Score";
        if (ntiles)
            newVariableLabel = "Quantiles: " + numGroups + " groups";
        newVariable = new VariableAttributes(newVariableName, newVariableLabel, ItemType.NOT_ITEM,
                DataType.DOUBLE, columnNumber++, "");

        dao.addColumnToDb(conn, tableName, newVariable);

        //compute ranks
        NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED, tiesStrategy);
        double[] ranks = ranking.rank(data.getElements());

        //begin transaction
        conn.setAutoCommit(false);

        //connect to table and update values
        SelectQuery select = new SelectQuery();
        Table sqlTable = new Table(tableName.getNameForDatabase());
        select.addColumn(sqlTable, newVariable.getName().nameForDatabase());
        stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_UPDATABLE);
        rs = stmt.executeQuery(select.toString());

        int nRanks = ranks.length;
        int rankIndex = 0;//array index for ranks (missing values not included)
        int dbIndex = 0;//db row position for case (missing values included)
        double r = Double.NaN;
        boolean missing = false;
        String tempName = "";
        while (rs.next()) {
            missing = missingIndex.contains(dbIndex);
            if (missing) {
                rs.updateNull(newVariable.getName().nameForDatabase());
            } else {
                r = ranks[rankIndex];
                if (blom) {
                    rs.updateDouble(newVariable.getName().nameForDatabase(),
                            normScore.blom(r, (double) nRanks));
                } else if (tukey) {
                    rs.updateDouble(newVariable.getName().nameForDatabase(),
                            normScore.tukey(r, (double) nRanks));
                } else if (vdw) {
                    rs.updateDouble(newVariable.getName().nameForDatabase(),
                            normScore.vanderWaerden(r, (double) nRanks));
                } else if (ntiles) {
                    rs.updateDouble(newVariable.getName().nameForDatabase(),
                            getGroup(r, (double) nRanks, (double) numGroups));
                } else {
                    rs.updateDouble(newVariable.getName().nameForDatabase(), r);
                }
                rankIndex++;
            }
            rs.updateRow();
            updateProgress();
            dbIndex++;
        }
        conn.commit();
        return "Ranks computed";
    } catch (SQLException ex) {
        conn.rollback();
        throw ex;
    } finally {
        if (rs != null)
            rs.close();
        if (stmt != null)
            stmt.close();
        conn.setAutoCommit(true);
    }

}

From source file:nl.systemsgenetics.genenetworkbackend.hpo.TestDiseaseGenePerformance.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception/*from   w ww.  j  a  v  a2  s. c om*/
 */
public static void main(String[] args) throws Exception {

    final File diseaseGeneHpoFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt");
    final File ncbiToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgNcbiId.txt");
    final File hgncToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt");
    final File ensgSymbolMappingFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt");
    final File predictionMatrixFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_zscores.txt.gz");
    final File predictionMatrixCorrelationFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_pathwayCorrelation.txt");
    final File significantTermsFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_bonSigTerms.txt");
    final double correctedPCutoff = 0.05;
    final File hpoOboFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\hp.obo");
    final File hpoPredictionInfoFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_auc_bonferroni.txt");
    final File hposToExcludeFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoToExclude.txt");
    final File skewnessFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\skewnessSummary.txt");
    final boolean randomize = true;
    final File annotationMatrixFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt_matrix.txt.gz");
    final File backgroundForRandomize = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\Ensembl2Reactome_All_Levels.txt_genesInPathways.txt");
    //final File backgroundForRandomize = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\expressedReactomeGenes.txt");
    final boolean randomizeCustomBackground = true;

    Map<String, String> ensgSymbolMapping = loadEnsgToHgnc(ensgSymbolMappingFile);

    final File outputFile;
    final ArrayList<String> backgroundGenes;
    if (randomize) {

        if (randomizeCustomBackground) {
            System.err.println("First need to fix so ranking list contains all genes in background list");
            return;
            //            backgroundGenes = loadBackgroundGenes(backgroundForRandomize);
            //            outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedCustomBackground.txt");
        } else {
            backgroundGenes = null;
            outputFile = new File(
                    "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedExtraNorm.txt");
        }

    } else {
        backgroundGenes = null;
        outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkExtraNorm.txt");
    }

    final HashMap<String, ArrayList<String>> ncbiToEnsgMap = loadNcbiToEnsgMap(ncbiToEnsgMapFile);
    final HashMap<String, ArrayList<String>> hgncToEnsgMap = loadHgncToEnsgMap(hgncToEnsgMapFile);
    final HashSet<String> exludedHpo = loadHpoExclude(hposToExcludeFile);

    final SkewnessInfo skewnessInfo = new SkewnessInfo(skewnessFile);

    LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile);

    DoubleMatrixDataset<String, String> predictionMatrix = DoubleMatrixDataset
            .loadDoubleData(predictionMatrixFile.getAbsolutePath());
    DoubleMatrixDataset<String, String> predictionMatrixSignificant = predictionMatrix
            .viewColSelection(significantTerms);

    DoubleMatrixDataset<String, String> predictionMatrixSignificantCorrelationMatrix = DoubleMatrixDataset
            .loadDoubleData(predictionMatrixCorrelationFile.getAbsolutePath());

    DiseaseGeneHpoData diseaseGeneHpoData = new DiseaseGeneHpoData(diseaseGeneHpoFile, ncbiToEnsgMap,
            hgncToEnsgMap, exludedHpo, new HashSet(predictionMatrix.getHashRows().keySet()), "OMIM");

    //NOTE if one would use a differnt background this needs to be updated
    HashSet<String> diseaseGenes = new HashSet<>(diseaseGeneHpoData.getDiseaseGenes());

    if (randomize) {
        diseaseGeneHpoData = diseaseGeneHpoData.getPermutation(1, backgroundGenes);
    }

    for (String gene : diseaseGenes) {
        if (!predictionMatrixSignificant.containsRow(gene)) {
            throw new Exception("Error: " + gene);
        }
    }

    int[] mapGeneIndexToDiseaseGeneIndex = new int[predictionMatrix.rows()];
    ArrayList<String> predictedGenes = predictionMatrix.getRowObjects();

    int g2 = 0;
    for (int g = 0; g < predictedGenes.size(); ++g) {
        mapGeneIndexToDiseaseGeneIndex[g] = diseaseGenes.contains(predictedGenes.get(g)) ? g2++ : -1;
    }

    DoubleMatrixDataset<String, String> annotationnMatrix = DoubleMatrixDataset
            .loadDoubleData(annotationMatrixFile.getAbsolutePath());
    DoubleMatrixDataset<String, String> annotationMatrixSignificant = annotationnMatrix
            .viewColSelection(significantTerms);

    HashMap<String, MeanSd> hpoMeanSds = calculatePathayMeansOfAnnotatedGenes(predictionMatrixSignificant,
            annotationMatrixSignificant);

    Map<String, PredictionInfo> predictionInfo = HpoFinder.loadPredictionInfo(hpoPredictionInfoFile);

    Ontology hpoOntology = HpoFinder.loadHpoOntology(hpoOboFile);

    HpoFinder hpoFinder = new HpoFinder(hpoOntology, predictionInfo);

    final int totalGenes = predictionMatrixSignificant.rows();
    final int totalDiseaseGenes = diseaseGenes.size();
    final double[] geneScores = new double[totalGenes];
    final double[] geneScoresDiseaseGenes = new double[totalDiseaseGenes];
    final NaturalRanking naturalRanking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.MAXIMUM);

    CSVWriter writer = new CSVWriter(new FileWriter(outputFile), '\t', '\0', '\0', "\n");

    String[] outputLine = new String[16];
    int c = 0;
    outputLine[c++] = "Disease";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Hgnc";
    outputLine[c++] = "Rank";
    outputLine[c++] = "RankAmongDiseaseGenes";
    outputLine[c++] = "Z-score";
    outputLine[c++] = "HPO_skewness";
    outputLine[c++] = "Other_mean_skewness";
    outputLine[c++] = "Other_max_skewness";
    outputLine[c++] = "HPO_phenotypic_match_score";
    outputLine[c++] = "HPO_count";
    outputLine[c++] = "HPO_sum_auc";
    outputLine[c++] = "HPO_mean_auc";
    outputLine[c++] = "HPO_median_auc";
    outputLine[c++] = "HPO_terms";
    outputLine[c++] = "HPO_terms_match_score";
    writer.writeNext(outputLine);

    Random random = new Random(1);

    Mean meanCalculator = new Mean();
    Median medianCalculator = new Median();

    for (DiseaseGeneHpoData.DiseaseGene diseaseGene : diseaseGeneHpoData.getDiseaseGeneHpos()) {

        String gene = diseaseGene.getGene();
        String disease = diseaseGene.getDisease();

        if (!predictionMatrixSignificant.containsRow(gene)) {
            continue;
        }

        Set<String> geneHpos = diseaseGeneHpoData.getDiseaseEnsgHpos(diseaseGene);

        LinkedHashSet<String> geneHposPredictable = new LinkedHashSet<>();

        for (String hpo : geneHpos) {
            geneHposPredictable
                    .addAll(hpoFinder.getTermsToNames(hpoFinder.getPredictableTerms(hpo, correctedPCutoff)));
        }

        if (geneHposPredictable.isEmpty()) {
            continue;
        }

        //         if(geneHposPredictable.size() > 1){
        //            String hpoSelected = geneHposPredictable.toArray(new String[geneHposPredictable.size()])[random.nextInt(geneHposPredictable.size())];
        //            geneHposPredictable = new LinkedHashSet<>(1);
        //            geneHposPredictable.add(hpoSelected);
        //         }
        DoubleMatrixDataset<String, String> predictionCaseTerms = predictionMatrixSignificant
                .viewColSelection(geneHposPredictable);
        DoubleMatrix2D predictionCaseTermsMatrix = predictionCaseTerms.getMatrix();

        double denominator = Math.sqrt(geneHposPredictable.size());

        for (int g = 0; g < totalGenes; ++g) {
            geneScores[g] = predictionCaseTermsMatrix.viewRow(g).zSum() / denominator;
            if (Double.isNaN(geneScores[g])) {
                geneScores[g] = 0;
            }

            g2 = mapGeneIndexToDiseaseGeneIndex[g];
            if (g2 >= 0) {
                geneScoresDiseaseGenes[g2] = geneScores[g];
            }

        }

        double[] geneRanks = naturalRanking.rank(geneScores);
        int diseaseGeneIndex = predictionMatrixSignificant.getRowIndex(gene);

        double[] geneRanksDiseaseGenes = naturalRanking.rank(geneScoresDiseaseGenes);
        int diseaseGeneIndexInDiseaseGenesOnly = mapGeneIndexToDiseaseGeneIndex[diseaseGeneIndex];

        double zscore = geneScores[diseaseGeneIndex];
        double rank = (totalGenes - geneRanks[diseaseGeneIndex]) + 1;
        double rankAmongDiseaseGenes = (totalDiseaseGenes
                - geneRanksDiseaseGenes[diseaseGeneIndexInDiseaseGenesOnly]) + 1;

        double hpoPhenotypicMatchScore = 0;
        StringBuilder individualMatchScore = new StringBuilder();
        boolean notFirst = false;
        int usedHpos = 0;

        double[] aucs = new double[geneHposPredictable.size()];
        double sumAucs = 0;

        int i = 0;
        for (String hpo : geneHposPredictable) {

            usedHpos++;

            MeanSd hpoMeanSd = hpoMeanSds.get(hpo);

            double hpoPredictionZ = predictionMatrixSignificant.getElement(gene, hpo);

            double hpoPredictionOutlierScore = ((hpoPredictionZ - hpoMeanSd.getMean()) / hpoMeanSd.getSd());

            if (notFirst) {
                individualMatchScore.append(';');
            }
            notFirst = true;

            individualMatchScore.append(hpoPredictionOutlierScore);

            hpoPhenotypicMatchScore += hpoPredictionOutlierScore;

            aucs[i++] = predictionInfo.get(hpo).getAuc();
            sumAucs += predictionInfo.get(hpo).getAuc();

        }

        double meanAuc = meanCalculator.evaluate(aucs);
        double medianAuc = medianCalculator.evaluate(aucs);

        if (usedHpos == 0) {
            hpoPhenotypicMatchScore = Double.NaN;
        } else {
            hpoPhenotypicMatchScore = hpoPhenotypicMatchScore / usedHpos;
        }

        String symbol = ensgSymbolMapping.get(gene);
        if (symbol == null) {
            symbol = "";
        }

        c = 0;
        outputLine[c++] = disease;
        outputLine[c++] = gene;
        outputLine[c++] = symbol;
        outputLine[c++] = String.valueOf(rank);
        outputLine[c++] = String.valueOf(rankAmongDiseaseGenes);
        outputLine[c++] = String.valueOf(zscore);
        outputLine[c++] = String.valueOf(skewnessInfo.getHpoSkewness(gene));
        outputLine[c++] = String.valueOf(skewnessInfo.getMeanSkewnessExHpo(gene));
        outputLine[c++] = String.valueOf(skewnessInfo.getMaxSkewnessExHpo(gene));
        outputLine[c++] = String.valueOf(hpoPhenotypicMatchScore);
        outputLine[c++] = String.valueOf(geneHposPredictable.size());
        outputLine[c++] = String.valueOf(sumAucs);
        outputLine[c++] = String.valueOf(meanAuc);
        outputLine[c++] = String.valueOf(medianAuc);
        outputLine[c++] = String.join(";", geneHposPredictable);
        outputLine[c++] = individualMatchScore.toString();
        writer.writeNext(outputLine);

    }

    writer.close();

}

From source file:org.gitools.analysis.stats.test.MannWhitneyWilcoxonTest.java

public MannWhitneyWilcoxonTest() {
    super("mannWhitneyWilcoxon", GroupComparisonResult.class);
    naturalRanking = new NaturalRanking(NaNStrategy.FIXED, TiesStrategy.AVERAGE);
}

From source file:umcg.genetica.math.stats.MannWhitneyUTest2.java

public MannWhitneyUTest2() {
    naturalRanking = new NaturalRanking(NaNStrategy.FIXED, TiesStrategy.AVERAGE);
}

From source file:umcg.genetica.math.stats.MannWhitneyUTest2.java

/**
 * Create a test instance using the given strategies for NaN's and ties.
 * Only use this if you are sure of what you are doing.
 *
 * @param nanStrategy specifies the strategy that should be used for
 * Double.NaN's//w  w w. j a  v  a  2s. c  om
 * @param tiesStrategy specifies the strategy that should be used for ties
 */
public MannWhitneyUTest2(final NaNStrategy nanStrategy, final TiesStrategy tiesStrategy) {
    naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
}