List of usage examples for org.apache.commons.lang.time StopWatch reset
public void reset()
Resets the stopwatch.
From source file:ubic.BAMSandAllen.optimize.GreedyRemover.java
/** * Iteratively remove rows from the B data matrix of the matrix pair, each time increasing the correlation the * maximum possible/* w ww . j a va 2 s . c om*/ * * @param iterations number of iterations to perform * @param slow indicates whether to re-compute regressions for every gene removal test * @throws Exception */ public void run(int iterations, boolean slow) throws Exception { long startTime = System.currentTimeMillis(); StopWatch watch = new StopWatch(); watch.start(); StopWatch smallWatch = new StopWatch(); int randomChoices = 0; for (int i = 0; i < iterations; i++) { smallWatch.reset(); smallWatch.start(); // find the row that increases the correlation the most double baseCorrelation = pair.getCorrelation(true); log.info("Base correlation:" + baseCorrelation + " size:" + pair.getMatrixBDataRows().size()); // pair.printDimensions(); double bestIncrease = Double.MAX_VALUE * -1; String bestRow = null; int count = 0; if (!slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) { ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false); } List<String> rows = pair.getMatrixBDataRows(); for (String rowName : pair.getMatrixBDataRows()) { // if ( ++count % 5000 == 0 ) log.info( "Data row:" + count + " time:" + watch.getTime() ); double withoutCorrelation = pair.correWithoutMatrixBDataRow(rowName); double diff = withoutCorrelation - baseCorrelation; if (baseCorrelation < 0) { diff *= -1; } if (diff > bestIncrease) { bestRow = rowName; bestIncrease = diff; } } if (!slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) { ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(true); } // now remove it // pair.removeDataRow( bestRow ); //old way if (bestRow == null) { bestRow = pair.getMatrixBDataRows().get((int) Math.random() * rows.size()); log.info("No best row found " + pair.getCorrelation(true)); log.info("Removing any gene results in correlation reduction " + pair.getCorrelation(true)); break; // log.info( "Using random row:" + bestRow ); // randomChoices++; } pair.removeMatrixBDataRowFast(bestRow); FileTools.stringToFile(bestRow + "\n", new File(SetupParameters.getDataFolder() + "LOOGenesInOrder." + startTime + ".txt"), true); if (i % 1000 == 0) { // write correlation FileTools.stringToFile(i + "," + baseCorrelation + "\n", new File(SetupParameters.getDataFolder() + "LOOResults." + startTime + ".txt"), true); } log.info(bestRow + " changes correlation by " + bestIncrease + " time:" + (smallWatch.getTime() / 1000) + "s total:" + (watch.getTime() / 1000) + "s"); } log.info("Random choices: " + randomChoices); log.info("Start time:" + startTime); }
From source file:ubic.BAMSandAllen.optimize.GreedyThreadRunner.java
public void run() { StopWatch watch = new StopWatch(); watch.start();// w w w .jav a2 s .c om StopWatch smallWatch = new StopWatch(); smallWatch.reset(); smallWatch.start(); // find the row that increases the correlation the most double baseCorrelation = baseLine; // log.info( "Base correlation:" + baseCorrelation + " size:" + pair.getMatrixBDataRows().size() ); // pair.printDimensions(); bestIncrease = Double.MAX_VALUE * -1; bestRow = null; int count = 0; if (pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) { ConnectivityAndAllenPartialExpressionMatrixPair partialPair = ((ConnectivityAndAllenPartialExpressionMatrixPair) pair); // only do it if we are regressing on expression, ugly if (!slow && partialPair .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.BOTH && partialPair .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.EXPRESSION) { ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false); } } // if ( !slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair ) { // ( ( ConnectivityAndAllenPartialExpressionMatrixPair ) pair ).setRegressionComputeMatrixB( false ); // } for (String rowName : rowsToTest) { // if ( ++count % 5000 == 0 ) log.info( "Data row:" + count + " time:" + watch.getTime() ); double withoutCorrelation = pair.correWithoutMatrixBDataRow(rowName); double diff = withoutCorrelation - baseCorrelation; if (!increase) { diff *= -1; } // TEMP for ranking once // try { // FileTools.stringToFile( rowName + "\t" + diff + "\n", new File( SetupParameters.getDataFolder() // + "firstrun.txt" ), true ); // } catch ( Exception e ) { // e.printStackTrace(); // } // TEMP if (diff > bestIncrease) { bestRow = rowName; bestIncrease = diff; } } // if ( !slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair ) { // ( ( ConnectivityAndAllenPartialExpressionMatrixPair ) pair ).setRegressionComputeMatrixB( true ); // } if (pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) { ConnectivityAndAllenPartialExpressionMatrixPair partialPair = ((ConnectivityAndAllenPartialExpressionMatrixPair) pair); // only do it if we are regressing on expression, ugly if (!slow && (partialPair .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.BOTH || partialPair .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.EXPRESSION)) { ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false); } } // log.info( bestRow + " changes correlation by " + bestIncrease + " time:" + ( smallWatch.getTime() / 1000 ) // + "s total:" + ( watch.getTime() / 1000 ) + "s" ); }
From source file:ubic.gemma.analysis.expression.coexpression.Gene2GenePopulationServiceImpl.java
/** * @param expressionExperiments/*w ww .java2 s. c o m*/ * @param toUseGenes * @param stringency * @param analysis if null, no results will be saved to the database, and will be printed to stdout instead. * @return genes that were processed. */ private Collection<Long> doAnalysis(Collection<? extends BioAssaySet> expressionExperiments, Collection<Gene> toUseGenes, int stringency, GeneCoexpressionAnalysis analysis) { int totalLinks = 0; Collection<Long> processedGenes = new HashSet<Long>(); Map<Long, Integer> eeIdOrder = ProbeLinkCoexpressionAnalyzerImpl.getOrderingMap(expressionExperiments); Map<Long, Gene> genesToAnalyzeMap = EntityUtils.getIdMap(toUseGenes); try { StopWatch timer = new StopWatch(); timer.start(); for (Gene queryGene : genesToAnalyzeMap.values()) { log.info("Starting: " + queryGene); totalLinks += processGene(expressionExperiments, genesToAnalyzeMap, analysis, eeIdOrder, processedGenes, stringency, queryGene); if (timer.getTime() > 10 * 60 * 1000) { timer.reset(); timer.start(); log.info("Processed " + processedGenes.size() + "/" + toUseGenes.size() + " genes so far, " + totalLinks + " links in total"); } } completeNodeDegreeComputations(analysis != null); if (analysis != null) { // All done... analysis.setDescription(analysis.getDescription() + "; " + totalLinks + " gene pairs stored."); analysis.setEnabled(true); geneCoexpressionAnalysisService.update(analysis); securityService.makePublic(analysis); } log.info(totalLinks + " gene pairs processed."); } catch (Exception e) { log.error("There was an error during analysis!"); if (analysis != null) { log.error("Cleaning up if possible ..."); geneCoexpressionAnalysisService.delete(analysis); } throw new RuntimeException(e); } return processedGenes; }
From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java
/** * Get coexpression results using a pure gene2gene query (without visiting the probe2probe tables. This is generally * faster, probably even if we're only interested in data from a subset of the experiments. * /*from ww w. j a v a 2 s. c om*/ * @param baseSet * @param eeIds Experiments to limit the results to (must not be null, and should already be security-filtered) * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly return links among the query genes only. * @return */ private CoexpressionMetaValueObject getFilteredCannedAnalysisResults(ExpressionExperimentSet baseSet, Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly) { if (queryGenes.isEmpty()) { throw new IllegalArgumentException("No genes in query"); } List<ExpressionExperimentValueObject> eevos = getSortedEEvos(eeIds); if (eevos.isEmpty()) { throw new IllegalArgumentException("There are no usable experiments in the selected set"); } /* * We get this prior to filtering so it matches the vectors stored with the analysis. */ expressionExperimentSetService.thaw(baseSet); List<Long> positionToIDMap = Gene2GenePopulationServiceImpl .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments())); /* * This set of links must be filtered to include those in the data sets being analyzed. */ Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults, queryGenesOnly); List<Long> filteredEeIds = (List<Long>) EntityUtils.getIds(eevos); CoexpressionMetaValueObject result = initValueObject(queryGenes, eevos, true); List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>(); Collection<Gene2GeneCoexpression> seen = new HashSet<Gene2GeneCoexpression>(); // queryGenes = geneService.thawLite( gg2gs.keySet() ); // populate the value objects. StopWatch timer = new StopWatch(); Collection<Gene> allUsedGenes = new HashSet<Gene>(); for (Gene queryGene : queryGenes) { timer.start(); if (!queryGene.getTaxon().equals(baseSet.getTaxon())) { throw new IllegalArgumentException( "Mismatch between taxon for expression experiment set selected and gene queries"); } allUsedGenes.add(queryGene); /* * For summary statistics */ CountingMap<Long> supportCount = new CountingMap<Long>(); Collection<Long> allSupportingDatasets = new HashSet<Long>(); Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>(); Collection<Long> allTestedDataSets = new HashSet<Long>(); int linksMetPositiveStringency = 0; int linksMetNegativeStringency = 0; Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGene.getId()); assert g2gs != null; List<Long> relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs); relevantEEIdList.retainAll(filteredEeIds); GeneValueObject queryGeneValueObject = new GeneValueObject(queryGene); HashMap<Gene, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Gene, Collection<Gene2GeneCoexpression>>(); // for queryGene get the interactions Map<Long, Gene2GeneProteinAssociation> proteinInteractionMap = this .getGene2GeneProteinAssociationForQueryGene(queryGene); Map<Long, TfGeneAssociation> regulatedBy = this.getTfGeneAssociationsforTargetGene(queryGene); Map<Long, TfGeneAssociation> regulates = this.getTfGeneAssociationsforTf(queryGene); if (timer.getTime() > 100) { log.info("Postprocess " + queryGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); for (Gene2GeneCoexpression g2g : g2gs) { StopWatch timer2 = new StopWatch(); timer2.start(); Gene foundGene = g2g.getFirstGene().equals(queryGene) ? g2g.getSecondGene() : g2g.getFirstGene(); allUsedGenes.add(foundGene); // FIXME Symptom fix for duplicate found genes // Keep track of the found genes that we can correctly identify // duplicates. // All keep the g2g object for debugging purposes. if (foundGenes.containsKey(foundGene)) { foundGenes.get(foundGene).add(g2g); log.warn("Duplicate gene found in coexpression results, skipping: " + foundGene + " From analysis: " + g2g.getSourceAnalysis().getId()); continue; // Found a duplicate gene, don't add to results // just our debugging list } foundGenes.put(foundGene, new ArrayList<Gene2GeneCoexpression>()); foundGenes.get(foundGene).add(g2g); CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt(); /* * This Thaw is a big time sink and _should not_ be necessary. */ // foundGene = geneService.thawLite( foundGene ); // db hit cvo.setQueryGene(queryGeneValueObject); cvo.setFoundGene(new GeneValueObject(foundGene)); if (timer2.getTime() > 10) log.info("Coexp. Gene processing phase I:" + timer2.getTime() + "ms"); timer2.stop(); timer2.reset(); timer2.start(); populateInteractions(proteinInteractionMap, regulatedBy, regulates, foundGene, cvo); Collection<Long> testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap); testingDatasets.retainAll(filteredEeIds); /* * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there were * 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the analyzer. */ Collection<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g, positionToIDMap); // necessary in case any were filtered out. supportingDatasets.retainAll(filteredEeIds); cvo.setSupportingExperiments(supportingDatasets); Collection<Long> specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g, positionToIDMap); /* * Specific probe EEids contains 1 even if the data set wasn't supporting. */ specificDatasets.retainAll(supportingDatasets); int numTestingDatasets = testingDatasets.size(); int numSupportingDatasets = supportingDatasets.size(); /* * SANITY CHECKS */ assert specificDatasets.size() <= numSupportingDatasets; assert numTestingDatasets >= numSupportingDatasets; assert numTestingDatasets <= eevos.size(); cvo.setDatasetVector( getDatasetVector(supportingDatasets, testingDatasets, specificDatasets, relevantEEIdList)); /* * This check is necessary in case any data sets were filtered out. (i.e., we're not interested in the * full set of data sets that were used in the original analysis. */ if (numSupportingDatasets < stringency) { continue; } allTestedDataSets.addAll(testingDatasets); int supportFromSpecificProbes = specificDatasets.size(); if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes); ++linksMetNegativeStringency; } else { cvo.setPosSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes); cvo.setNegSupp(0); ++linksMetPositiveStringency; } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); cvo.setNumTestedIn(numTestingDatasets); for (Long id : supportingDatasets) { supportCount.increment(id); } cvo.setSortKey(); /* * This check prevents links from being shown twice when we do "among query genes". We don't skip * entirely so we get the counts for the summary table populated correctly. */ if (!seen.contains(g2g)) { ecvos.add(cvo); } seen.add(g2g); allSupportingDatasets.addAll(supportingDatasets); allDatasetsWithSpecificProbes.addAll(specificDatasets); } Collection<Long> geneIds = new ArrayList<Long>(); for (Gene g : allUsedGenes) { geneIds.add(g.getId()); } populateNodeDegree(ecvos, geneIds, allTestedDataSets); if (timer.getTime() > 1000) { log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol() + "Phase II: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); // This is only necessary for debugging purposes. Helps us keep // track of duplicate genes found above. if (log.isDebugEnabled()) { for (Gene foundGene : foundGenes.keySet()) { if (foundGenes.get(foundGene).size() > 1) { log.debug("** DUPLICATE: " + foundGene.getOfficialSymbol() + " found multiple times. Gene2Genes objects are: "); for (Gene2GeneCoexpression g1g : foundGenes.get(foundGene)) { log.debug(" ============ Gene2Gene Id: " + g1g.getId() + " 1st gene: " + g1g.getFirstGene().getOfficialSymbol() + " 2nd gene: " + g1g.getSecondGene().getOfficialSymbol() + " Source Analysis: " + g1g.getSourceAnalysis().getId() + " # of dataSets: " + g1g.getNumDataSets()); } } } } CoexpressionSummaryValueObject summary = makeSummary(eevos, allTestedDataSets, allDatasetsWithSpecificProbes, linksMetPositiveStringency, linksMetNegativeStringency); result.getSummary().put(queryGene.getOfficialSymbol(), summary); generateDatasetSummary(eevos, result, supportCount, allSupportingDatasets, queryGene); /* * FIXME I'm lazy and rushed, so I'm using an existing field for this info; probably better to add another * field to the value object... */ for (ExpressionExperimentValueObject eevo : eevos) { eevo.setExternalUri(AnchorTagUtil.getExpressionExperimentUrl(eevo.getId())); } Collections.sort(ecvos); getGoOverlap(ecvos, queryGene); timer.stop(); if (timer.getTime() > 1000) { log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol() + " PhaseIII: " + timer.getTime() + "ms"); } timer.reset(); } // Over results. result.getKnownGeneResults().addAll(ecvos); return result; }
From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java
/** * @param baseSet/*from ww w . j a va 2 s . co m*/ * @param eeIds * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly * @return */ private Collection<CoexpressionValueObjectExt> getFilteredCannedAnalysisResults2( ExpressionExperimentSet baseSet, Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly, boolean skipDetails) { if (queryGenes.isEmpty()) { throw new IllegalArgumentException("No genes in query"); } List<ExpressionExperimentValueObject> eevos = null; List<Long> filteredEeIds = null; eevos = getSortedEEvos(eeIds); if (eevos.isEmpty()) { throw new IllegalArgumentException("There are no usable experiments in the selected set"); } filteredEeIds = (List<Long>) EntityUtils.getIds(eevos); /* * We get this prior to filtering so it matches the vectors stored with the analysis. */ expressionExperimentSetService.thaw(baseSet); List<Long> positionToIDMap = Gene2GenePopulationServiceImpl .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments())); /* * This set of links must be filtered to include those in the data sets being analyzed. */ Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults, queryGenesOnly); List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>(); Collection<Long> seenGene2Gene = new HashSet<Long>(); Collection<Long> queryGeneIds = gg2gs.keySet(); // return empty collection if no coexpression results if (queryGeneIds.isEmpty()) { return ecvos; } Collection<Long> gidsNeeded = new HashSet<Long>(); StopWatch timerGeneLoad = new StopWatch(); for (Long gid : queryGeneIds) { Element e = this.getGeneLightWeightCache().getCache().get(gid); if (e == null) { gidsNeeded.add(gid); } } if (!gidsNeeded.isEmpty()) { Collection<Gene> justLoadedGenes = geneService.loadThawedLiter(gidsNeeded); for (Gene g : justLoadedGenes) { this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g)); } } if (timerGeneLoad.getTime() > 100) { log.info("Loading and caching query genes took " + timerGeneLoad.getTime() + "ms"); } timerGeneLoad.reset(); timerGeneLoad.start(); gidsNeeded = new HashSet<Long>(); // load all genes first for (Long queryGid : queryGeneIds) { Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid); for (Gene2GeneCoexpression g2g : g2gs) { Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene() : g2g.getFirstGene(); if (this.getGeneLightWeightCache().getCache().get(foundGene.getId()) == null) { gidsNeeded.add(foundGene.getId()); } } } // Put all needed genes in Cache to that they are guaranteed to be there // for this method call if (!gidsNeeded.isEmpty()) { Collection<Gene> forCache = geneService.loadThawedLiter(gidsNeeded); for (Gene g : forCache) { this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g)); } } if (timerGeneLoad.getTime() > 100) { log.info("Loading and caching found genes took " + timerGeneLoad.getTime() + "ms"); } StopWatch timer = new StopWatch(); Collection<Long> allUsedGenes = new HashSet<Long>(); for (Long queryGid : queryGeneIds) { timer.reset(); timer.start(); Gene qGene = (Gene) this.getGeneLightWeightCache().getCache().get(queryGid).getValue(); if (!qGene.getTaxon().equals(baseSet.getTaxon())) { throw new IllegalArgumentException( "Mismatch between taxon for expression experiment set selected and gene queries"); } allUsedGenes.add(queryGid); /* * For summary statistics */ CountingMap<Long> supportCount = new CountingMap<Long>(); Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>(); Collection<Long> allTestedDataSets = new HashSet<Long>(); Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid); assert g2gs != null; List<Long> relevantEEIdList = null; if (!skipDetails) { relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs); relevantEEIdList.retainAll(filteredEeIds); } GeneValueObject queryGeneValueObject = new GeneValueObject(qGene); Map<Long, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Long, Collection<Gene2GeneCoexpression>>(); if (timer.getTime() > 100) { log.info("Postprocess " + qGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms"); } timer.stop(); timer.reset(); timer.start(); for (Gene2GeneCoexpression g2g : g2gs) { Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene() : g2g.getFirstGene(); allUsedGenes.add(foundGene.getId()); // use this flag to test for a duplicate link with opposite // stringency support (positive/negative) boolean testForDuplicateFlag = false; // duplicate found genes can occur when there is both positive // and negative support for the same link // Keep track of the found genes that we can correctly identify // duplicates. // All keep the g2g object for debugging purposes. if (foundGenes.containsKey(foundGene.getId())) { testForDuplicateFlag = true; } else { foundGenes.put(foundGene.getId(), new ArrayList<Gene2GeneCoexpression>()); } foundGenes.get(foundGene.getId()).add(g2g); CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt(); Long idToGet = foundGene.getId(); foundGene = (Gene) this.getGeneLightWeightCache().getCache().get(idToGet).getValue(); if (foundGene == null) { log.error("Gene id:" + idToGet + " Not in the GeneLightWeightCache, something is wrong"); continue; } cvo.setQueryGene(queryGeneValueObject); cvo.setFoundGene(new GeneValueObject(foundGene)); List<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g, positionToIDMap); // necessary in case any were filtered out. supportingDatasets.retainAll(filteredEeIds); cvo.setSupportingExperiments(supportingDatasets); List<Long> testingDatasets; List<Long> specificDatasets; if (!skipDetails) { testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap); testingDatasets.retainAll(filteredEeIds); /* * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there * were 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the * analyzer. */ specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g, positionToIDMap); /* * Specific probe EEids contains 1 even if the data set wasn't supporting. */ specificDatasets.retainAll(supportingDatasets); int numTestingDatasets = testingDatasets.size(); int numSupportingDatasets = supportingDatasets.size(); /* * SANITY CHECKS */ assert specificDatasets.size() <= numSupportingDatasets; assert numTestingDatasets >= numSupportingDatasets; assert numTestingDatasets <= eevos.size(); cvo.setDatasetVector(getDatasetVector(supportingDatasets, testingDatasets, specificDatasets, relevantEEIdList)); if (testForDuplicateFlag) { testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(), numSupportingDatasets, specificDatasets.size(), numTestingDatasets); continue; } else if (numSupportingDatasets < stringency) {// check in case any data sets were filtered // out.(i.e., we're not // interested in the full set of data sets that // were used in the // original analysis. continue; } allTestedDataSets.addAll(testingDatasets); int supportFromSpecificProbes = specificDatasets.size(); if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes); } else { cvo.setPosSupp(numSupportingDatasets); if (numSupportingDatasets != supportFromSpecificProbes) cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes); cvo.setNegSupp(0); } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); cvo.setNumTestedIn(numTestingDatasets); for (Long id : supportingDatasets) { supportCount.increment(id); } allDatasetsWithSpecificProbes.addAll(specificDatasets); } else { int numSupportingDatasets = supportingDatasets.size(); if (testForDuplicateFlag) { testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(), numSupportingDatasets, null, null); continue; } else if (numSupportingDatasets < stringency) { continue; } if (g2g.getEffect() < 0) { cvo.setPosSupp(0); cvo.setNegSupp(numSupportingDatasets); } else { cvo.setPosSupp(numSupportingDatasets); cvo.setNegSupp(0); } cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp())); } cvo.setSortKey(); /* * This check prevents links from being shown twice when we do "among query genes". We don't skip * entirely so we get the counts for the summary table populated correctly. */ if (!seenGene2Gene.contains(g2g.getId())) { ecvos.add(cvo); } seenGene2Gene.add(g2g.getId()); } if (timer.getTime() > 300) { log.info("Postprocess " + g2gs.size() + " results for " + qGene.getOfficialSymbol() + " Phase II: " + timer.getTime() + "ms"); } timer.stop(); Collections.sort(ecvos); } // Over querygenes populateNodeDegree(ecvos, allUsedGenes, filteredEeIds); return ecvos; }
From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java
/** * Retrieve all gene2gene coexpression information for the genes at the specified stringency, using methods that * don't filter by experiment.// w ww . j av a 2s .com * * @param queryGenes * @param stringency * @param maxResults * @param queryGenesOnly * @return */ private Map<Long, Collection<Gene2GeneCoexpression>> getRawCoexpression(Collection<Gene> queryGenes, int stringency, int maxResults, boolean queryGenesOnly) { Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = new HashMap<Long, Collection<Gene2GeneCoexpression>>(); if (queryGenes.size() == 0) { return gg2gs; } StopWatch timer = new StopWatch(); timer.start(); GeneCoexpressionAnalysis gA = findEnabledCoexpressionAnalysis(queryGenes); timer.stop(); if (timer.getTime() > 100) { log.info("Get analysis: " + timer.getTime() + "ms"); } timer.reset(); timer.start(); if (queryGenesOnly) { if (queryGenes.size() < 2) { throw new IllegalArgumentException("Must have at least two genes to do 'my genes only'"); } gg2gs = gene2GeneCoexpressionService.findInterCoexpressionRelationship(queryGenes, stringency, gA); } else { gg2gs = gene2GeneCoexpressionService.findCoexpressionRelationships(queryGenes, stringency, maxResults, gA); } if (timer.getTime() > 1000) { log.info("Get raw coexpression: " + timer.getTime() + "ms"); } return gg2gs; }
From source file:ubic.gemma.analysis.expression.coexpression.links.MatrixRowPairPearsonAnalysis.java
/** * Calculate the linear correlation matrix of a matrix, allowing missing values. If there are no missing values, * this calls PearsonFast.//from w w w.ja va2s . com */ @Override public void calculateMetrics() { if (this.numMissing == 0) { calculateMetricsFast(); return; } int numused; int numrows = this.dataMatrix.rows(); int numcols = this.dataMatrix.columns(); if (numcols < this.minNumUsed) { throw new IllegalArgumentException("Sorry, correlations will not be computed unless there are at least " + this.minNumUsed + " mutually present data points per vector pair, current data has only " + numcols + " columns."); } boolean docalcs = this.needToCalculateMetrics(); boolean[][] usedB = new boolean[][] {}; double[][] data = new double[][] {}; if (docalcs) { // Temporarily copy the data in this matrix, for performance. usedB = new boolean[numrows][numcols]; data = new double[numrows][numcols]; for (int i = 0; i < numrows; i++) { // first vector for (int j = 0; j < numcols; j++) { // second vector usedB[i][j] = used.get(i, j); // this is only needed if we use it below, speeds things up // slightly. data[i][j] = this.dataMatrix.get(i, j); } } rowStatistics(); } /* for each vector, compare it to all other vectors */ ExpressionDataMatrixRowElement itemA = null; StopWatch timer = new StopWatch(); timer.start(); double[] vectorA = new double[] {}; double syy, sxy, sxx, sx, sy, xj, yj; int skipped = 0; int numComputed = 0; for (int i = 0; i < numrows; i++) { // first vector itemA = this.dataMatrix.getRowElement(i); if (!this.hasGene(itemA)) { skipped++; continue; } if (docalcs) { vectorA = data[i]; } boolean thisRowHasMissing = hasMissing[i]; for (int j = i + 1; j < numrows; j++) { // second vector ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j); if (!this.hasGene(itemB)) continue; // second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks. if (!docalcs || results.get(i, j) != 0.0) { keepCorrel(i, j, results.get(i, j), numcols); continue; } double[] vectorB = data[j]; /* if there are no missing values, use the faster method of calculation */ if (!thisRowHasMissing && !hasMissing[j]) { setCorrel(i, j, correlFast(vectorA, vectorB, i, j), numcols); continue; } /* do it the old fashioned way */ numused = 0; sxy = 0.0; sxx = 0.0; syy = 0.0; sx = 0.0; sy = 0.0; for (int k = 0; k < numcols; k++) { xj = vectorA[k]; yj = vectorB[k]; if (usedB[i][k] && usedB[j][k]) { /* this is a bit faster than calling Double.isNan */ sx += xj; sy += yj; sxy += xj * yj; sxx += xj * xj; syy += yj * yj; numused++; } } // avoid -1 correlations or extremely noisy values (minNumUsed should be set high enough so that degrees // of freedom isn't too low. if (numused < this.minNumUsed) setCorrel(i, j, Double.NaN, 0); else { double denom = correlationNorm(numused, sxx, sx, syy, sy); if (denom <= 0.0) { // means variance is zero for one of the vectors. setCorrel(i, j, 0.0, numused); } else { double correl = (sxy - sx * sy / numused) / Math.sqrt(denom); setCorrel(i, j, correl, numused); } } ++numComputed; } if ((i + 1) % 2000 == 0) { double t = timer.getTime() / 1000.0; log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : "") + String.format(", time elapsed since last check: %.2f", t) + "s"); timer.reset(); timer.start(); } } log.info(skipped + " rows skipped, where probe lacks a gene annotation"); finishMetrics(); }
From source file:ubic.gemma.analysis.expression.coexpression.links.MatrixRowPairPearsonAnalysis.java
/** * Calculate a linear correlation matrix for a matrix. Use this if you know there are no missing values, or don't * care about NaNs. Rows that are not mapped to genes are skipped. * // w w w . j ava 2 s . com * @param duplicates The map containing information about what items are the 'same' as other items; such are * skipped. */ private void calculateMetricsFast() { int numrows = this.dataMatrix.rows(); int numcols = this.dataMatrix.columns(); boolean docalcs = this.needToCalculateMetrics(); double[][] data = new double[][] {}; if (docalcs) { rowStatistics(); // Temporarily put the data in this matrix (performance) data = new double[numrows][numcols]; for (int i = 0; i < numrows; i++) { // first vector for (int j = 0; j < numcols; j++) { // second vector data[i][j] = this.dataMatrix.get(i, j); } } } /* * For each vector, compare it to all other vectors, avoid repeating things; skip items that don't have genes * mapped to them. */ StopWatch timer = new StopWatch(); timer.start(); ExpressionDataMatrixRowElement itemA = null; ExpressionDataMatrixRowElement itemB = null; double[] vectorA = null; int skipped = 0; int numComputed = 0; for (int i = 0; i < numrows; i++) { itemA = this.dataMatrix.getRowElement(i); if (!this.hasGene(itemA)) { skipped++; continue; } if (docalcs) { vectorA = data[i]; } for (int j = i + 1; j < numrows; j++) { itemB = this.dataMatrix.getRowElement(j); if (!this.hasGene(itemB)) continue; if (!docalcs || results.get(i, j) != 0.0) { // second pass over matrix. Don't calculate it // if we // already have it. Just do the requisite checks. keepCorrel(i, j, results.get(i, j), numcols); continue; } double[] vectorB = data[j]; setCorrel(i, j, correlFast(vectorA, vectorB, i, j), numcols); ++numComputed; } if ((i + 1) % 2000 == 0) { double t = timer.getTime() / 1000.0; log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : "") + String.format(", time elapsed since last check: %.2f", t) + "s"); timer.reset(); timer.start(); } } log.info(skipped + " rows skipped, due to no gene association"); finishMetrics(); }
From source file:ubic.gemma.analysis.expression.diff.GeneDifferentialExpressionServiceImpl.java
@Override public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene, double threshold, Integer limit) {// w ww . j av a2 s .c o m StopWatch timer = new StopWatch(); timer.start(); Collection<DifferentialExpressionValueObject> devos = new ArrayList<DifferentialExpressionValueObject>(); if (gene == null) return devos; if (timer.getTime() > 1000) { log.info("Find experiments: " + timer.getTime() + " ms"); } timer.reset(); timer.start(); Map<BioAssaySet, List<DifferentialExpressionAnalysisResult>> results = differentialExpressionResultService .find(gene, threshold, limit); timer.stop(); if (timer.getTime() > 1000) { log.info("Diff raw ex results: " + timer.getTime() + " ms"); } return postProcessDiffExResults(gene, threshold, results); }
From source file:ubic.gemma.analysis.expression.diff.LinearModelAnalyzer.java
/** * Linear models solved/* w w w. j a va 2 s. co m*/ * * @param designMatrix * @param data * @param rawResults * @param quantitationType * @return */ private Future<?> runAnalysisFuture(final DesignMatrix designMatrix, final DoubleMatrix<String, String> data, final Map<String, LinearModelSummary> rawResults, final QuantitationType quantitationType) { ExecutorService service = Executors.newSingleThreadExecutor(); Future<?> f = service.submit(new Runnable() { @Override public void run() { StopWatch timer = new StopWatch(); timer.start(); LeastSquaresFit fit = null; if (quantitationType.getScale().equals(ScaleType.COUNT)) { log.info(" Performing weighted least squares regression on COUNT data "); MeanVarianceEstimator mv = new MeanVarianceEstimator(designMatrix, data); fit = new LeastSquaresFit(designMatrix, data, mv.getWeights()); } else { fit = new LeastSquaresFit(designMatrix, data); } log.info("Model fit data matrix " + data.rows() + " x " + data.columns() + ": " + timer.getTime() + "ms"); timer.reset(); timer.start(); Map<String, LinearModelSummary> res = fit.summarizeByKeys(true); log.info("Model summarize/ANOVA: " + timer.getTime() + "ms"); rawResults.putAll(res); log.info("Analysis phase done ..."); } }); service.shutdown(); return f; }