List of usage examples for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM
TiesStrategy MAXIMUM
To view the source code for org.apache.commons.math3.stat.ranking TiesStrategy MAXIMUM.
Click Source Link
From source file:ch.unil.genescore.vegas.GeneDataFakePhenotype.java
private void addSignal(double[] fakePheno) { for (int i = 0; i < fakePheno.length; i++) { fakePheno[i] = fakePheno[i] + fakeSignal_[i]; }/*from w ww . jav a 2s .c o m*/ NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL, TiesStrategy.MAXIMUM); double[] ranks = ranking.rank(fakePheno); for (int i = 0; i < fakePheno.length; i++) { //fakePheno[i]=myNormal.inverseCumulativeProbability(ranks[i]/(ranks.length+1)); fakePheno[i] = DistributionMethods.normalInverseCumulativeProbability(ranks[i] / (ranks.length + 1)); } }
From source file:com.itemanalysis.jmetrik.stats.ranking.RankingAnalysis.java
/** * Adds columns to database for storing ranks, ntiles, and normal scores. * A prefix is added to the variable name to indicate the type of variable. * * These prefixes are used to update the proper variables in the database (see compute()). * * @throws SQLException/*w w w . ja va 2s . co m*/ * @throws IllegalArgumentException */ // public void addColumnsToDb()throws SQLException, IllegalArgumentException{ // // int numberOfColumns = dao.getColumnCount(conn, tableName); // int columnNumber = numberOfColumns+1; // // String newVariableLabel = "Rank"; // if(blom) newVariableLabel = "Blom Normal Score"; // if(tukey) newVariableLabel = "Tukey Normal Score"; // if(vdw) newVariableLabel = "van der Waerden Normal Score"; // if(ntiles) newVariableLabel = "Quantiles: " + numGroups + " groups"; // // newVariable = new VariableInfo(newVariableName, newVariableLabel, VariableType.NOT_ITEM, VariableType.DOUBLE, columnNumber++, ""); // dao.addColumnToDb(conn, tableName, newVariable); // // } protected String doInBackground() { sw = new StopWatch(); this.firePropertyChange("status", "", "Running Ranking..."); this.firePropertyChange("progress-on", null, null); String results = ""; try { //get variable info from db tableName = new DataTableName(command.getPairedOptionList("data").getStringAt("table")); VariableTableName variableTableName = new VariableTableName(tableName.toString()); String selectVariable = command.getFreeOption("variable").getString(); variable = dao.getVariableAttributes(conn, variableTableName, selectVariable); newVariableName = command.getFreeOption("name").getString(); initializeProgress(); String ties = command.getSelectOneOption("ties").getSelectedArgument(); if (ties.equals("sequential")) { tiesStrategy = TiesStrategy.SEQUENTIAL; } else if (ties.equals("min")) { tiesStrategy = TiesStrategy.MINIMUM; } else if (ties.equals("max")) { tiesStrategy = TiesStrategy.MAXIMUM; } else if (ties.equals("average")) { tiesStrategy = TiesStrategy.AVERAGE; } else if (ties.equals("random")) { tiesStrategy = TiesStrategy.RANDOM; } String type = command.getSelectOneOption("type").getSelectedArgument(); if ("blom".equals(type)) { blom = true; } else if ("tukey".equals(type)) { tukey = true; } else if ("vdw".equals(type)) { vdw = true; } else if ("ntiles".equals(type)) { ntiles = true; if (command.getFreeOption("ntiles").hasValue()) { numGroups = command.getFreeOption("ntiles").getInteger(); } else { rank = true; } } else { rank = true; } if (blom || tukey || vdw) normScore = new NormalScores(); ascending = command.getSelectOneOption("order").isValueSelected("asc"); // addColumnsToDb(); results = compute(); firePropertyChange("status", "", "Done: " + sw.getElapsedTime()); firePropertyChange("progress-off", null, null); //make statusbar progress not visible } catch (Throwable t) { logger.fatal(t.getMessage(), t); theException = t; } return results; }
From source file:nl.systemsgenetics.genenetworkbackend.hpo.TestDiseaseGenePerformance.java
/** * @param args the command line arguments * @throws java.lang.Exception/*from w ww .j a v a 2s . co m*/ */ public static void main(String[] args) throws Exception { final File diseaseGeneHpoFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt"); final File ncbiToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgNcbiId.txt"); final File hgncToEnsgMapFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt"); final File ensgSymbolMappingFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\ensgHgnc.txt"); final File predictionMatrixFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_zscores.txt.gz"); final File predictionMatrixCorrelationFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_pathwayCorrelation.txt"); final File significantTermsFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_bonSigTerms.txt"); final double correctedPCutoff = 0.05; final File hpoOboFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\HPO\\135\\hp.obo"); final File hpoPredictionInfoFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\hpo_predictions_auc_bonferroni.txt"); final File hposToExcludeFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoToExclude.txt"); final File skewnessFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\predictions\\skewnessSummary.txt"); final boolean randomize = true; final File annotationMatrixFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\ALL_SOURCES_ALL_FREQUENCIES_phenotype_to_genes.txt_matrix.txt.gz"); final File backgroundForRandomize = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\Data31995Genes05-12-2017\\PCA_01_02_2018\\PathwayMatrix\\Ensembl2Reactome_All_Levels.txt_genesInPathways.txt"); //final File backgroundForRandomize = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\expressedReactomeGenes.txt"); final boolean randomizeCustomBackground = true; Map<String, String> ensgSymbolMapping = loadEnsgToHgnc(ensgSymbolMappingFile); final File outputFile; final ArrayList<String> backgroundGenes; if (randomize) { if (randomizeCustomBackground) { System.err.println("First need to fix so ranking list contains all genes in background list"); return; // backgroundGenes = loadBackgroundGenes(backgroundForRandomize); // outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedCustomBackground.txt"); } else { backgroundGenes = null; outputFile = new File( "C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkRandomizedExtraNorm.txt"); } } else { backgroundGenes = null; outputFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\hpoDiseaseBenchmarkExtraNorm.txt"); } final HashMap<String, ArrayList<String>> ncbiToEnsgMap = loadNcbiToEnsgMap(ncbiToEnsgMapFile); final HashMap<String, ArrayList<String>> hgncToEnsgMap = loadHgncToEnsgMap(hgncToEnsgMapFile); final HashSet<String> exludedHpo = loadHpoExclude(hposToExcludeFile); final SkewnessInfo skewnessInfo = new SkewnessInfo(skewnessFile); LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile); DoubleMatrixDataset<String, String> predictionMatrix = DoubleMatrixDataset .loadDoubleData(predictionMatrixFile.getAbsolutePath()); DoubleMatrixDataset<String, String> predictionMatrixSignificant = predictionMatrix .viewColSelection(significantTerms); DoubleMatrixDataset<String, String> predictionMatrixSignificantCorrelationMatrix = DoubleMatrixDataset .loadDoubleData(predictionMatrixCorrelationFile.getAbsolutePath()); DiseaseGeneHpoData diseaseGeneHpoData = new DiseaseGeneHpoData(diseaseGeneHpoFile, ncbiToEnsgMap, hgncToEnsgMap, exludedHpo, new HashSet(predictionMatrix.getHashRows().keySet()), "OMIM"); //NOTE if one would use a differnt background this needs to be updated HashSet<String> diseaseGenes = new HashSet<>(diseaseGeneHpoData.getDiseaseGenes()); if (randomize) { diseaseGeneHpoData = diseaseGeneHpoData.getPermutation(1, backgroundGenes); } for (String gene : diseaseGenes) { if (!predictionMatrixSignificant.containsRow(gene)) { throw new Exception("Error: " + gene); } } int[] mapGeneIndexToDiseaseGeneIndex = new int[predictionMatrix.rows()]; ArrayList<String> predictedGenes = predictionMatrix.getRowObjects(); int g2 = 0; for (int g = 0; g < predictedGenes.size(); ++g) { mapGeneIndexToDiseaseGeneIndex[g] = diseaseGenes.contains(predictedGenes.get(g)) ? g2++ : -1; } DoubleMatrixDataset<String, String> annotationnMatrix = DoubleMatrixDataset .loadDoubleData(annotationMatrixFile.getAbsolutePath()); DoubleMatrixDataset<String, String> annotationMatrixSignificant = annotationnMatrix .viewColSelection(significantTerms); HashMap<String, MeanSd> hpoMeanSds = calculatePathayMeansOfAnnotatedGenes(predictionMatrixSignificant, annotationMatrixSignificant); Map<String, PredictionInfo> predictionInfo = HpoFinder.loadPredictionInfo(hpoPredictionInfoFile); Ontology hpoOntology = HpoFinder.loadHpoOntology(hpoOboFile); HpoFinder hpoFinder = new HpoFinder(hpoOntology, predictionInfo); final int totalGenes = predictionMatrixSignificant.rows(); final int totalDiseaseGenes = diseaseGenes.size(); final double[] geneScores = new double[totalGenes]; final double[] geneScoresDiseaseGenes = new double[totalDiseaseGenes]; final NaturalRanking naturalRanking = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.MAXIMUM); CSVWriter writer = new CSVWriter(new FileWriter(outputFile), '\t', '\0', '\0', "\n"); String[] outputLine = new String[16]; int c = 0; outputLine[c++] = "Disease"; outputLine[c++] = "Gene"; outputLine[c++] = "Hgnc"; outputLine[c++] = "Rank"; outputLine[c++] = "RankAmongDiseaseGenes"; outputLine[c++] = "Z-score"; outputLine[c++] = "HPO_skewness"; outputLine[c++] = "Other_mean_skewness"; outputLine[c++] = "Other_max_skewness"; outputLine[c++] = "HPO_phenotypic_match_score"; outputLine[c++] = "HPO_count"; outputLine[c++] = "HPO_sum_auc"; outputLine[c++] = "HPO_mean_auc"; outputLine[c++] = "HPO_median_auc"; outputLine[c++] = "HPO_terms"; outputLine[c++] = "HPO_terms_match_score"; writer.writeNext(outputLine); Random random = new Random(1); Mean meanCalculator = new Mean(); Median medianCalculator = new Median(); for (DiseaseGeneHpoData.DiseaseGene diseaseGene : diseaseGeneHpoData.getDiseaseGeneHpos()) { String gene = diseaseGene.getGene(); String disease = diseaseGene.getDisease(); if (!predictionMatrixSignificant.containsRow(gene)) { continue; } Set<String> geneHpos = diseaseGeneHpoData.getDiseaseEnsgHpos(diseaseGene); LinkedHashSet<String> geneHposPredictable = new LinkedHashSet<>(); for (String hpo : geneHpos) { geneHposPredictable .addAll(hpoFinder.getTermsToNames(hpoFinder.getPredictableTerms(hpo, correctedPCutoff))); } if (geneHposPredictable.isEmpty()) { continue; } // if(geneHposPredictable.size() > 1){ // String hpoSelected = geneHposPredictable.toArray(new String[geneHposPredictable.size()])[random.nextInt(geneHposPredictable.size())]; // geneHposPredictable = new LinkedHashSet<>(1); // geneHposPredictable.add(hpoSelected); // } DoubleMatrixDataset<String, String> predictionCaseTerms = predictionMatrixSignificant .viewColSelection(geneHposPredictable); DoubleMatrix2D predictionCaseTermsMatrix = predictionCaseTerms.getMatrix(); double denominator = Math.sqrt(geneHposPredictable.size()); for (int g = 0; g < totalGenes; ++g) { geneScores[g] = predictionCaseTermsMatrix.viewRow(g).zSum() / denominator; if (Double.isNaN(geneScores[g])) { geneScores[g] = 0; } g2 = mapGeneIndexToDiseaseGeneIndex[g]; if (g2 >= 0) { geneScoresDiseaseGenes[g2] = geneScores[g]; } } double[] geneRanks = naturalRanking.rank(geneScores); int diseaseGeneIndex = predictionMatrixSignificant.getRowIndex(gene); double[] geneRanksDiseaseGenes = naturalRanking.rank(geneScoresDiseaseGenes); int diseaseGeneIndexInDiseaseGenesOnly = mapGeneIndexToDiseaseGeneIndex[diseaseGeneIndex]; double zscore = geneScores[diseaseGeneIndex]; double rank = (totalGenes - geneRanks[diseaseGeneIndex]) + 1; double rankAmongDiseaseGenes = (totalDiseaseGenes - geneRanksDiseaseGenes[diseaseGeneIndexInDiseaseGenesOnly]) + 1; double hpoPhenotypicMatchScore = 0; StringBuilder individualMatchScore = new StringBuilder(); boolean notFirst = false; int usedHpos = 0; double[] aucs = new double[geneHposPredictable.size()]; double sumAucs = 0; int i = 0; for (String hpo : geneHposPredictable) { usedHpos++; MeanSd hpoMeanSd = hpoMeanSds.get(hpo); double hpoPredictionZ = predictionMatrixSignificant.getElement(gene, hpo); double hpoPredictionOutlierScore = ((hpoPredictionZ - hpoMeanSd.getMean()) / hpoMeanSd.getSd()); if (notFirst) { individualMatchScore.append(';'); } notFirst = true; individualMatchScore.append(hpoPredictionOutlierScore); hpoPhenotypicMatchScore += hpoPredictionOutlierScore; aucs[i++] = predictionInfo.get(hpo).getAuc(); sumAucs += predictionInfo.get(hpo).getAuc(); } double meanAuc = meanCalculator.evaluate(aucs); double medianAuc = medianCalculator.evaluate(aucs); if (usedHpos == 0) { hpoPhenotypicMatchScore = Double.NaN; } else { hpoPhenotypicMatchScore = hpoPhenotypicMatchScore / usedHpos; } String symbol = ensgSymbolMapping.get(gene); if (symbol == null) { symbol = ""; } c = 0; outputLine[c++] = disease; outputLine[c++] = gene; outputLine[c++] = symbol; outputLine[c++] = String.valueOf(rank); outputLine[c++] = String.valueOf(rankAmongDiseaseGenes); outputLine[c++] = String.valueOf(zscore); outputLine[c++] = String.valueOf(skewnessInfo.getHpoSkewness(gene)); outputLine[c++] = String.valueOf(skewnessInfo.getMeanSkewnessExHpo(gene)); outputLine[c++] = String.valueOf(skewnessInfo.getMaxSkewnessExHpo(gene)); outputLine[c++] = String.valueOf(hpoPhenotypicMatchScore); outputLine[c++] = String.valueOf(geneHposPredictable.size()); outputLine[c++] = String.valueOf(sumAucs); outputLine[c++] = String.valueOf(meanAuc); outputLine[c++] = String.valueOf(medianAuc); outputLine[c++] = String.join(";", geneHposPredictable); outputLine[c++] = individualMatchScore.toString(); writer.writeNext(outputLine); } writer.close(); }