Example usage for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM

List of usage examples for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM.

Prototype

TiesStrategy MAXIMUM

To view the source code for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM.

Click Source Link

Document

Ties get the maximum applicable rank

Usage

From source file:ch.unil.genescore.vegas.GeneDataFakePhenotype.java

private void addSignal(double[] fakePheno) {

    for (int i = 0; i < fakePheno.length; i++) {
        fakePheno[i] = fakePheno[i] + fakeSignal_[i];
    }/*from w  ww  . jav a  2s  .c  o m*/
    NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL, TiesStrategy.MAXIMUM);
    double[] ranks = ranking.rank(fakePheno);
    for (int i = 0; i < fakePheno.length; i++) {
        //fakePheno[i]=myNormal.inverseCumulativeProbability(ranks[i]/(ranks.length+1));         
        fakePheno[i] = DistributionMethods.normalInverseCumulativeProbability(ranks[i] / (ranks.length + 1));

    }
}

From source file:com.itemanalysis.jmetrik.stats.ranking.RankingAnalysis.java

/**
 * Adds columns to database for storing ranks, ntiles, and normal scores.
 * A prefix is added to the variable name to indicate the type of variable.
 *
 * These prefixes are used to update the proper variables in the database (see compute()).
 *
 * @throws SQLException/*w  w  w .  ja  va  2s  .  co m*/
 * @throws IllegalArgumentException
 */
//    public void addColumnsToDb()throws SQLException, IllegalArgumentException{
//
//        int numberOfColumns = dao.getColumnCount(conn, tableName);
//        int columnNumber = numberOfColumns+1;
//
//        String newVariableLabel = "Rank";
//        if(blom) newVariableLabel = "Blom Normal Score";
//        if(tukey) newVariableLabel = "Tukey Normal Score";
//        if(vdw) newVariableLabel = "van der Waerden Normal Score";
//        if(ntiles) newVariableLabel = "Quantiles: " + numGroups + " groups";
//
//        newVariable = new VariableInfo(newVariableName, newVariableLabel, VariableType.NOT_ITEM, VariableType.DOUBLE, columnNumber++, "");
//        dao.addColumnToDb(conn, tableName, newVariable);
//
//    }

protected String doInBackground() {
    sw = new StopWatch();
    this.firePropertyChange("status", "", "Running Ranking...");
    this.firePropertyChange("progress-on", null, null);
    String results = "";
    try {

        //get variable info from db
        tableName = new DataTableName(command.getPairedOptionList("data").getStringAt("table"));
        VariableTableName variableTableName = new VariableTableName(tableName.toString());
        String selectVariable = command.getFreeOption("variable").getString();
        variable = dao.getVariableAttributes(conn, variableTableName, selectVariable);

        newVariableName = command.getFreeOption("name").getString();

        initializeProgress();

        String ties = command.getSelectOneOption("ties").getSelectedArgument();
        if (ties.equals("sequential")) {
            tiesStrategy = TiesStrategy.SEQUENTIAL;
        } else if (ties.equals("min")) {
            tiesStrategy = TiesStrategy.MINIMUM;
        } else if (ties.equals("max")) {
            tiesStrategy = TiesStrategy.MAXIMUM;
        } else if (ties.equals("average")) {
            tiesStrategy = TiesStrategy.AVERAGE;
        } else if (ties.equals("random")) {
            tiesStrategy = TiesStrategy.RANDOM;
        }

        String type = command.getSelectOneOption("type").getSelectedArgument();
        if ("blom".equals(type)) {
            blom = true;
        } else if ("tukey".equals(type)) {
            tukey = true;
        } else if ("vdw".equals(type)) {
            vdw = true;
        } else if ("ntiles".equals(type)) {
            ntiles = true;
            if (command.getFreeOption("ntiles").hasValue()) {
                numGroups = command.getFreeOption("ntiles").getInteger();
            } else {
                rank = true;
            }
        } else {
            rank = true;
        }

        if (blom || tukey || vdw)
            normScore = new NormalScores();
        ascending = command.getSelectOneOption("order").isValueSelected("asc");

        //            addColumnsToDb();
        results = compute();

        firePropertyChange("status", "", "Done: " + sw.getElapsedTime());
        firePropertyChange("progress-off", null, null); //make statusbar progress not visible

    } catch (Throwable t) {
        logger.fatal(t.getMessage(), t);
        theException = t;
    }
    return results;
}

From source file:nl.systemsgenetics.genenetworkbackend.hpo.TestDiseaseGenePerformance.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception/*from   w ww  .j a v a  2s .  co  m*/
 */
public static void main(String[] args) throws Exception {

    final File diseaseGeneHpoFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt");
    final File ncbiToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgNcbiId.txt");
    final File hgncToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt");
    final File ensgSymbolMappingFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt");
    final File predictionMatrixFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_zscores.txt.gz");
    final File predictionMatrixCorrelationFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_pathwayCorrelation.txt");
    final File significantTermsFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_bonSigTerms.txt");
    final double correctedPCutoff = 0.05;
    final File hpoOboFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\hp.obo");
    final File hpoPredictionInfoFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_auc_bonferroni.txt");
    final File hposToExcludeFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoToExclude.txt");
    final File skewnessFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\skewnessSummary.txt");
    final boolean randomize = true;
    final File annotationMatrixFile = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt_matrix.txt.gz");
    final File backgroundForRandomize = new File(
            "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\Ensembl2Reactome_All_Levels.txt_genesInPathways.txt");
    //final File backgroundForRandomize = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\expressedReactomeGenes.txt");
    final boolean randomizeCustomBackground = true;

    Map<String, String> ensgSymbolMapping = loadEnsgToHgnc(ensgSymbolMappingFile);

    final File outputFile;
    final ArrayList<String> backgroundGenes;
    if (randomize) {

        if (randomizeCustomBackground) {
            System.err.println("First need to fix so ranking list contains all genes in background list");
            return;
            //            backgroundGenes = loadBackgroundGenes(backgroundForRandomize);
            //            outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedCustomBackground.txt");
        } else {
            backgroundGenes = null;
            outputFile = new File(
                    "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedExtraNorm.txt");
        }

    } else {
        backgroundGenes = null;
        outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkExtraNorm.txt");
    }

    final HashMap<String, ArrayList<String>> ncbiToEnsgMap = loadNcbiToEnsgMap(ncbiToEnsgMapFile);
    final HashMap<String, ArrayList<String>> hgncToEnsgMap = loadHgncToEnsgMap(hgncToEnsgMapFile);
    final HashSet<String> exludedHpo = loadHpoExclude(hposToExcludeFile);

    final SkewnessInfo skewnessInfo = new SkewnessInfo(skewnessFile);

    LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile);

    DoubleMatrixDataset<String, String> predictionMatrix = DoubleMatrixDataset
            .loadDoubleData(predictionMatrixFile.getAbsolutePath());
    DoubleMatrixDataset<String, String> predictionMatrixSignificant = predictionMatrix
            .viewColSelection(significantTerms);

    DoubleMatrixDataset<String, String> predictionMatrixSignificantCorrelationMatrix = DoubleMatrixDataset
            .loadDoubleData(predictionMatrixCorrelationFile.getAbsolutePath());

    DiseaseGeneHpoData diseaseGeneHpoData = new DiseaseGeneHpoData(diseaseGeneHpoFile, ncbiToEnsgMap,
            hgncToEnsgMap, exludedHpo, new HashSet(predictionMatrix.getHashRows().keySet()), "OMIM");

    //NOTE if one would use a differnt background this needs to be updated
    HashSet<String> diseaseGenes = new HashSet<>(diseaseGeneHpoData.getDiseaseGenes());

    if (randomize) {
        diseaseGeneHpoData = diseaseGeneHpoData.getPermutation(1, backgroundGenes);
    }

    for (String gene : diseaseGenes) {
        if (!predictionMatrixSignificant.containsRow(gene)) {
            throw new Exception("Error: " + gene);
        }
    }

    int[] mapGeneIndexToDiseaseGeneIndex = new int[predictionMatrix.rows()];
    ArrayList<String> predictedGenes = predictionMatrix.getRowObjects();

    int g2 = 0;
    for (int g = 0; g < predictedGenes.size(); ++g) {
        mapGeneIndexToDiseaseGeneIndex[g] = diseaseGenes.contains(predictedGenes.get(g)) ? g2++ : -1;
    }

    DoubleMatrixDataset<String, String> annotationnMatrix = DoubleMatrixDataset
            .loadDoubleData(annotationMatrixFile.getAbsolutePath());
    DoubleMatrixDataset<String, String> annotationMatrixSignificant = annotationnMatrix
            .viewColSelection(significantTerms);

    HashMap<String, MeanSd> hpoMeanSds = calculatePathayMeansOfAnnotatedGenes(predictionMatrixSignificant,
            annotationMatrixSignificant);

    Map<String, PredictionInfo> predictionInfo = HpoFinder.loadPredictionInfo(hpoPredictionInfoFile);

    Ontology hpoOntology = HpoFinder.loadHpoOntology(hpoOboFile);

    HpoFinder hpoFinder = new HpoFinder(hpoOntology, predictionInfo);

    final int totalGenes = predictionMatrixSignificant.rows();
    final int totalDiseaseGenes = diseaseGenes.size();
    final double[] geneScores = new double[totalGenes];
    final double[] geneScoresDiseaseGenes = new double[totalDiseaseGenes];
    final NaturalRanking naturalRanking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.MAXIMUM);

    CSVWriter writer = new CSVWriter(new FileWriter(outputFile), '\t', '\0', '\0', "\n");

    String[] outputLine = new String[16];
    int c = 0;
    outputLine[c++] = "Disease";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Hgnc";
    outputLine[c++] = "Rank";
    outputLine[c++] = "RankAmongDiseaseGenes";
    outputLine[c++] = "Z-score";
    outputLine[c++] = "HPO_skewness";
    outputLine[c++] = "Other_mean_skewness";
    outputLine[c++] = "Other_max_skewness";
    outputLine[c++] = "HPO_phenotypic_match_score";
    outputLine[c++] = "HPO_count";
    outputLine[c++] = "HPO_sum_auc";
    outputLine[c++] = "HPO_mean_auc";
    outputLine[c++] = "HPO_median_auc";
    outputLine[c++] = "HPO_terms";
    outputLine[c++] = "HPO_terms_match_score";
    writer.writeNext(outputLine);

    Random random = new Random(1);

    Mean meanCalculator = new Mean();
    Median medianCalculator = new Median();

    for (DiseaseGeneHpoData.DiseaseGene diseaseGene : diseaseGeneHpoData.getDiseaseGeneHpos()) {

        String gene = diseaseGene.getGene();
        String disease = diseaseGene.getDisease();

        if (!predictionMatrixSignificant.containsRow(gene)) {
            continue;
        }

        Set<String> geneHpos = diseaseGeneHpoData.getDiseaseEnsgHpos(diseaseGene);

        LinkedHashSet<String> geneHposPredictable = new LinkedHashSet<>();

        for (String hpo : geneHpos) {
            geneHposPredictable
                    .addAll(hpoFinder.getTermsToNames(hpoFinder.getPredictableTerms(hpo, correctedPCutoff)));
        }

        if (geneHposPredictable.isEmpty()) {
            continue;
        }

        //         if(geneHposPredictable.size() > 1){
        //            String hpoSelected = geneHposPredictable.toArray(new String[geneHposPredictable.size()])[random.nextInt(geneHposPredictable.size())];
        //            geneHposPredictable = new LinkedHashSet<>(1);
        //            geneHposPredictable.add(hpoSelected);
        //         }
        DoubleMatrixDataset<String, String> predictionCaseTerms = predictionMatrixSignificant
                .viewColSelection(geneHposPredictable);
        DoubleMatrix2D predictionCaseTermsMatrix = predictionCaseTerms.getMatrix();

        double denominator = Math.sqrt(geneHposPredictable.size());

        for (int g = 0; g < totalGenes; ++g) {
            geneScores[g] = predictionCaseTermsMatrix.viewRow(g).zSum() / denominator;
            if (Double.isNaN(geneScores[g])) {
                geneScores[g] = 0;
            }

            g2 = mapGeneIndexToDiseaseGeneIndex[g];
            if (g2 >= 0) {
                geneScoresDiseaseGenes[g2] = geneScores[g];
            }

        }

        double[] geneRanks = naturalRanking.rank(geneScores);
        int diseaseGeneIndex = predictionMatrixSignificant.getRowIndex(gene);

        double[] geneRanksDiseaseGenes = naturalRanking.rank(geneScoresDiseaseGenes);
        int diseaseGeneIndexInDiseaseGenesOnly = mapGeneIndexToDiseaseGeneIndex[diseaseGeneIndex];

        double zscore = geneScores[diseaseGeneIndex];
        double rank = (totalGenes - geneRanks[diseaseGeneIndex]) + 1;
        double rankAmongDiseaseGenes = (totalDiseaseGenes
                - geneRanksDiseaseGenes[diseaseGeneIndexInDiseaseGenesOnly]) + 1;

        double hpoPhenotypicMatchScore = 0;
        StringBuilder individualMatchScore = new StringBuilder();
        boolean notFirst = false;
        int usedHpos = 0;

        double[] aucs = new double[geneHposPredictable.size()];
        double sumAucs = 0;

        int i = 0;
        for (String hpo : geneHposPredictable) {

            usedHpos++;

            MeanSd hpoMeanSd = hpoMeanSds.get(hpo);

            double hpoPredictionZ = predictionMatrixSignificant.getElement(gene, hpo);

            double hpoPredictionOutlierScore = ((hpoPredictionZ - hpoMeanSd.getMean()) / hpoMeanSd.getSd());

            if (notFirst) {
                individualMatchScore.append(';');
            }
            notFirst = true;

            individualMatchScore.append(hpoPredictionOutlierScore);

            hpoPhenotypicMatchScore += hpoPredictionOutlierScore;

            aucs[i++] = predictionInfo.get(hpo).getAuc();
            sumAucs += predictionInfo.get(hpo).getAuc();

        }

        double meanAuc = meanCalculator.evaluate(aucs);
        double medianAuc = medianCalculator.evaluate(aucs);

        if (usedHpos == 0) {
            hpoPhenotypicMatchScore = Double.NaN;
        } else {
            hpoPhenotypicMatchScore = hpoPhenotypicMatchScore / usedHpos;
        }

        String symbol = ensgSymbolMapping.get(gene);
        if (symbol == null) {
            symbol = "";
        }

        c = 0;
        outputLine[c++] = disease;
        outputLine[c++] = gene;
        outputLine[c++] = symbol;
        outputLine[c++] = String.valueOf(rank);
        outputLine[c++] = String.valueOf(rankAmongDiseaseGenes);
        outputLine[c++] = String.valueOf(zscore);
        outputLine[c++] = String.valueOf(skewnessInfo.getHpoSkewness(gene));
        outputLine[c++] = String.valueOf(skewnessInfo.getMeanSkewnessExHpo(gene));
        outputLine[c++] = String.valueOf(skewnessInfo.getMaxSkewnessExHpo(gene));
        outputLine[c++] = String.valueOf(hpoPhenotypicMatchScore);
        outputLine[c++] = String.valueOf(geneHposPredictable.size());
        outputLine[c++] = String.valueOf(sumAucs);
        outputLine[c++] = String.valueOf(meanAuc);
        outputLine[c++] = String.valueOf(medianAuc);
        outputLine[c++] = String.join(";", geneHposPredictable);
        outputLine[c++] = individualMatchScore.toString();
        writer.writeNext(outputLine);

    }

    writer.close();

}