Example usage for org.apache.commons.lang.time StopWatch reset

List of usage examples for org.apache.commons.lang.time StopWatch reset

Introduction

In this page you can find the example usage for org.apache.commons.lang.time StopWatch reset.

Prototype

public void reset() 

Source Link

Document

Resets the stopwatch.

Usage

From source file:ubic.BAMSandAllen.optimize.GreedyRemover.java

/**
 * Iteratively remove rows from the B data matrix of the matrix pair, each time increasing the correlation the
 * maximum possible/* w  ww  . j  a va 2 s  . c  om*/
 * 
 * @param iterations number of iterations to perform
 * @param slow indicates whether to re-compute regressions for every gene removal test
 * @throws Exception
 */
public void run(int iterations, boolean slow) throws Exception {
    long startTime = System.currentTimeMillis();
    StopWatch watch = new StopWatch();
    watch.start();
    StopWatch smallWatch = new StopWatch();
    int randomChoices = 0;

    for (int i = 0; i < iterations; i++) {
        smallWatch.reset();
        smallWatch.start();
        // find the row that increases the correlation the most

        double baseCorrelation = pair.getCorrelation(true);
        log.info("Base correlation:" + baseCorrelation + " size:" + pair.getMatrixBDataRows().size());
        // pair.printDimensions();

        double bestIncrease = Double.MAX_VALUE * -1;
        String bestRow = null;
        int count = 0;
        if (!slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) {
            ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false);
        }
        List<String> rows = pair.getMatrixBDataRows();
        for (String rowName : pair.getMatrixBDataRows()) {
            // if ( ++count % 5000 == 0 ) log.info( "Data row:" + count + " time:" + watch.getTime() );

            double withoutCorrelation = pair.correWithoutMatrixBDataRow(rowName);
            double diff = withoutCorrelation - baseCorrelation;
            if (baseCorrelation < 0) {
                diff *= -1;
            }
            if (diff > bestIncrease) {
                bestRow = rowName;
                bestIncrease = diff;
            }
        }

        if (!slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) {
            ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(true);
        }

        // now remove it
        // pair.removeDataRow( bestRow ); //old way
        if (bestRow == null) {
            bestRow = pair.getMatrixBDataRows().get((int) Math.random() * rows.size());
            log.info("No best row found " + pair.getCorrelation(true));
            log.info("Removing any gene results in correlation reduction " + pair.getCorrelation(true));
            break;
            // log.info( "Using random row:" + bestRow );
            // randomChoices++;
        }
        pair.removeMatrixBDataRowFast(bestRow);

        FileTools.stringToFile(bestRow + "\n",
                new File(SetupParameters.getDataFolder() + "LOOGenesInOrder." + startTime + ".txt"), true);
        if (i % 1000 == 0) {
            // write correlation
            FileTools.stringToFile(i + "," + baseCorrelation + "\n",
                    new File(SetupParameters.getDataFolder() + "LOOResults." + startTime + ".txt"), true);
        }
        log.info(bestRow + " changes correlation by " + bestIncrease + " time:" + (smallWatch.getTime() / 1000)
                + "s total:" + (watch.getTime() / 1000) + "s");

    }
    log.info("Random choices: " + randomChoices);
    log.info("Start time:" + startTime);
}

From source file:ubic.BAMSandAllen.optimize.GreedyThreadRunner.java

public void run() {
    StopWatch watch = new StopWatch();
    watch.start();//  w  w  w  .jav  a2 s  .c om
    StopWatch smallWatch = new StopWatch();

    smallWatch.reset();
    smallWatch.start();
    // find the row that increases the correlation the most

    double baseCorrelation = baseLine;
    // log.info( "Base correlation:" + baseCorrelation + " size:" + pair.getMatrixBDataRows().size() );
    // pair.printDimensions();

    bestIncrease = Double.MAX_VALUE * -1;
    bestRow = null;
    int count = 0;

    if (pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) {
        ConnectivityAndAllenPartialExpressionMatrixPair partialPair = ((ConnectivityAndAllenPartialExpressionMatrixPair) pair);
        // only do it if we are regressing on expression, ugly
        if (!slow
                && partialPair
                        .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.BOTH
                && partialPair
                        .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.EXPRESSION) {
            ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false);
        }
    }

    // if ( !slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair ) {
    // ( ( ConnectivityAndAllenPartialExpressionMatrixPair ) pair ).setRegressionComputeMatrixB( false );
    // }

    for (String rowName : rowsToTest) {
        // if ( ++count % 5000 == 0 ) log.info( "Data row:" + count + " time:" + watch.getTime() );

        double withoutCorrelation = pair.correWithoutMatrixBDataRow(rowName);
        double diff = withoutCorrelation - baseCorrelation;
        if (!increase) {
            diff *= -1;
        }

        // TEMP for ranking once
        // try {
        // FileTools.stringToFile( rowName + "\t" + diff + "\n", new File( SetupParameters.getDataFolder()
        // + "firstrun.txt" ), true );
        // } catch ( Exception e ) {
        // e.printStackTrace();
        // }
        // TEMP

        if (diff > bestIncrease) {
            bestRow = rowName;
            bestIncrease = diff;
        }
    }

    // if ( !slow && pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair ) {
    // ( ( ConnectivityAndAllenPartialExpressionMatrixPair ) pair ).setRegressionComputeMatrixB( true );
    // }

    if (pair instanceof ConnectivityAndAllenPartialExpressionMatrixPair) {
        ConnectivityAndAllenPartialExpressionMatrixPair partialPair = ((ConnectivityAndAllenPartialExpressionMatrixPair) pair);
        // only do it if we are regressing on expression, ugly
        if (!slow && (partialPair
                .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.BOTH
                || partialPair
                        .getRegressType() == ConnectivityAndAllenPartialExpressionMatrixPair.RegressMatrix.EXPRESSION)) {
            ((ConnectivityAndAllenPartialExpressionMatrixPair) pair).setRegressionComputeMatrixB(false);
        }
    }

    // log.info( bestRow + " changes correlation by " + bestIncrease + " time:" + ( smallWatch.getTime() / 1000 )
    // + "s total:" + ( watch.getTime() / 1000 ) + "s" );

}

From source file:ubic.gemma.analysis.expression.coexpression.Gene2GenePopulationServiceImpl.java

/**
 * @param expressionExperiments/*w  ww .java2 s. c o m*/
 * @param toUseGenes
 * @param stringency
 * @param analysis if null, no results will be saved to the database, and will be printed to stdout instead.
 * @return genes that were processed.
 */
private Collection<Long> doAnalysis(Collection<? extends BioAssaySet> expressionExperiments,
        Collection<Gene> toUseGenes, int stringency, GeneCoexpressionAnalysis analysis) {
    int totalLinks = 0;
    Collection<Long> processedGenes = new HashSet<Long>();
    Map<Long, Integer> eeIdOrder = ProbeLinkCoexpressionAnalyzerImpl.getOrderingMap(expressionExperiments);
    Map<Long, Gene> genesToAnalyzeMap = EntityUtils.getIdMap(toUseGenes);

    try {
        StopWatch timer = new StopWatch();
        timer.start();
        for (Gene queryGene : genesToAnalyzeMap.values()) {

            log.info("Starting: " + queryGene);
            totalLinks += processGene(expressionExperiments, genesToAnalyzeMap, analysis, eeIdOrder,
                    processedGenes, stringency, queryGene);
            if (timer.getTime() > 10 * 60 * 1000) {
                timer.reset();
                timer.start();
                log.info("Processed " + processedGenes.size() + "/" + toUseGenes.size() + " genes so far, "
                        + totalLinks + " links in total");
            }
        }

        completeNodeDegreeComputations(analysis != null);

        if (analysis != null) {
            // All done...
            analysis.setDescription(analysis.getDescription() + "; " + totalLinks + " gene pairs stored.");
            analysis.setEnabled(true);
            geneCoexpressionAnalysisService.update(analysis);
            securityService.makePublic(analysis);
        }

        log.info(totalLinks + " gene pairs processed.");

    } catch (Exception e) {
        log.error("There was an error during analysis!");

        if (analysis != null) {
            log.error("Cleaning up if possible ...");
            geneCoexpressionAnalysisService.delete(analysis);
        }
        throw new RuntimeException(e);
    }
    return processedGenes;
}

From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java

/**
 * Get coexpression results using a pure gene2gene query (without visiting the probe2probe tables. This is generally
 * faster, probably even if we're only interested in data from a subset of the experiments.
 * /*from ww  w.  j a v  a  2  s.  c om*/
 * @param baseSet
 * @param eeIds Experiments to limit the results to (must not be null, and should already be security-filtered)
 * @param queryGenes
 * @param stringency
 * @param maxResults
 * @param queryGenesOnly return links among the query genes only.
 * @return
 */
private CoexpressionMetaValueObject getFilteredCannedAnalysisResults(ExpressionExperimentSet baseSet,
        Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency, int maxResults,
        boolean queryGenesOnly) {

    if (queryGenes.isEmpty()) {
        throw new IllegalArgumentException("No genes in query");
    }

    List<ExpressionExperimentValueObject> eevos = getSortedEEvos(eeIds);

    if (eevos.isEmpty()) {
        throw new IllegalArgumentException("There are no usable experiments in the selected set");
    }

    /*
     * We get this prior to filtering so it matches the vectors stored with the analysis.
     */
    expressionExperimentSetService.thaw(baseSet);
    List<Long> positionToIDMap = Gene2GenePopulationServiceImpl
            .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments()));

    /*
     * This set of links must be filtered to include those in the data sets being analyzed.
     */
    Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults,
            queryGenesOnly);

    List<Long> filteredEeIds = (List<Long>) EntityUtils.getIds(eevos);

    CoexpressionMetaValueObject result = initValueObject(queryGenes, eevos, true);

    List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>();

    Collection<Gene2GeneCoexpression> seen = new HashSet<Gene2GeneCoexpression>();

    // queryGenes = geneService.thawLite( gg2gs.keySet() );

    // populate the value objects.
    StopWatch timer = new StopWatch();
    Collection<Gene> allUsedGenes = new HashSet<Gene>();
    for (Gene queryGene : queryGenes) {
        timer.start();

        if (!queryGene.getTaxon().equals(baseSet.getTaxon())) {
            throw new IllegalArgumentException(
                    "Mismatch between taxon for expression experiment set selected and gene queries");
        }

        allUsedGenes.add(queryGene);

        /*
         * For summary statistics
         */
        CountingMap<Long> supportCount = new CountingMap<Long>();
        Collection<Long> allSupportingDatasets = new HashSet<Long>();
        Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>();
        Collection<Long> allTestedDataSets = new HashSet<Long>();

        int linksMetPositiveStringency = 0;
        int linksMetNegativeStringency = 0;

        Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGene.getId());

        assert g2gs != null;

        List<Long> relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs);
        relevantEEIdList.retainAll(filteredEeIds);

        GeneValueObject queryGeneValueObject = new GeneValueObject(queryGene);

        HashMap<Gene, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Gene, Collection<Gene2GeneCoexpression>>();

        // for queryGene get the interactions
        Map<Long, Gene2GeneProteinAssociation> proteinInteractionMap = this
                .getGene2GeneProteinAssociationForQueryGene(queryGene);

        Map<Long, TfGeneAssociation> regulatedBy = this.getTfGeneAssociationsforTargetGene(queryGene);
        Map<Long, TfGeneAssociation> regulates = this.getTfGeneAssociationsforTf(queryGene);

        if (timer.getTime() > 100) {
            log.info("Postprocess " + queryGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms");
        }
        timer.stop();
        timer.reset();
        timer.start();

        for (Gene2GeneCoexpression g2g : g2gs) {
            StopWatch timer2 = new StopWatch();
            timer2.start();

            Gene foundGene = g2g.getFirstGene().equals(queryGene) ? g2g.getSecondGene() : g2g.getFirstGene();

            allUsedGenes.add(foundGene);

            // FIXME Symptom fix for duplicate found genes
            // Keep track of the found genes that we can correctly identify
            // duplicates.
            // All keep the g2g object for debugging purposes.
            if (foundGenes.containsKey(foundGene)) {
                foundGenes.get(foundGene).add(g2g);
                log.warn("Duplicate gene found in coexpression results, skipping: " + foundGene
                        + " From analysis: " + g2g.getSourceAnalysis().getId());
                continue; // Found a duplicate gene, don't add to results
                          // just our debugging list

            }

            foundGenes.put(foundGene, new ArrayList<Gene2GeneCoexpression>());
            foundGenes.get(foundGene).add(g2g);

            CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt();

            /*
             * This Thaw is a big time sink and _should not_ be necessary.
             */
            // foundGene = geneService.thawLite( foundGene ); // db hit

            cvo.setQueryGene(queryGeneValueObject);
            cvo.setFoundGene(new GeneValueObject(foundGene));

            if (timer2.getTime() > 10)
                log.info("Coexp. Gene processing phase I:" + timer2.getTime() + "ms");
            timer2.stop();
            timer2.reset();
            timer2.start();

            populateInteractions(proteinInteractionMap, regulatedBy, regulates, foundGene, cvo);

            Collection<Long> testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g,
                    positionToIDMap);
            testingDatasets.retainAll(filteredEeIds);

            /*
             * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there were
             * 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the analyzer.
             */
            Collection<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g,
                    positionToIDMap);

            // necessary in case any were filtered out.
            supportingDatasets.retainAll(filteredEeIds);

            cvo.setSupportingExperiments(supportingDatasets);

            Collection<Long> specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g,
                    positionToIDMap);

            /*
             * Specific probe EEids contains 1 even if the data set wasn't supporting.
             */
            specificDatasets.retainAll(supportingDatasets);

            int numTestingDatasets = testingDatasets.size();
            int numSupportingDatasets = supportingDatasets.size();

            /*
             * SANITY CHECKS
             */
            assert specificDatasets.size() <= numSupportingDatasets;
            assert numTestingDatasets >= numSupportingDatasets;
            assert numTestingDatasets <= eevos.size();

            cvo.setDatasetVector(
                    getDatasetVector(supportingDatasets, testingDatasets, specificDatasets, relevantEEIdList));

            /*
             * This check is necessary in case any data sets were filtered out. (i.e., we're not interested in the
             * full set of data sets that were used in the original analysis.
             */
            if (numSupportingDatasets < stringency) {
                continue;
            }

            allTestedDataSets.addAll(testingDatasets);

            int supportFromSpecificProbes = specificDatasets.size();
            if (g2g.getEffect() < 0) {
                cvo.setPosSupp(0);
                cvo.setNegSupp(numSupportingDatasets);
                if (numSupportingDatasets != supportFromSpecificProbes)
                    cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes);

                ++linksMetNegativeStringency;
            } else {
                cvo.setPosSupp(numSupportingDatasets);
                if (numSupportingDatasets != supportFromSpecificProbes)
                    cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes);
                cvo.setNegSupp(0);
                ++linksMetPositiveStringency;
            }
            cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp()));
            cvo.setNumTestedIn(numTestingDatasets);

            for (Long id : supportingDatasets) {
                supportCount.increment(id);
            }

            cvo.setSortKey();

            /*
             * This check prevents links from being shown twice when we do "among query genes". We don't skip
             * entirely so we get the counts for the summary table populated correctly.
             */
            if (!seen.contains(g2g)) {
                ecvos.add(cvo);
            }

            seen.add(g2g);

            allSupportingDatasets.addAll(supportingDatasets);
            allDatasetsWithSpecificProbes.addAll(specificDatasets);

        }

        Collection<Long> geneIds = new ArrayList<Long>();

        for (Gene g : allUsedGenes) {

            geneIds.add(g.getId());

        }

        populateNodeDegree(ecvos, geneIds, allTestedDataSets);

        if (timer.getTime() > 1000) {
            log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol()
                    + "Phase II: " + timer.getTime() + "ms");
        }
        timer.stop();
        timer.reset();
        timer.start();

        // This is only necessary for debugging purposes. Helps us keep
        // track of duplicate genes found above.
        if (log.isDebugEnabled()) {
            for (Gene foundGene : foundGenes.keySet()) {
                if (foundGenes.get(foundGene).size() > 1) {
                    log.debug("** DUPLICATE: " + foundGene.getOfficialSymbol()
                            + " found multiple times. Gene2Genes objects are: ");
                    for (Gene2GeneCoexpression g1g : foundGenes.get(foundGene)) {
                        log.debug(" ============ Gene2Gene Id: " + g1g.getId() + " 1st gene: "
                                + g1g.getFirstGene().getOfficialSymbol() + " 2nd gene: "
                                + g1g.getSecondGene().getOfficialSymbol() + " Source Analysis: "
                                + g1g.getSourceAnalysis().getId() + " # of dataSets: " + g1g.getNumDataSets());
                    }
                }
            }
        }

        CoexpressionSummaryValueObject summary = makeSummary(eevos, allTestedDataSets,
                allDatasetsWithSpecificProbes, linksMetPositiveStringency, linksMetNegativeStringency);
        result.getSummary().put(queryGene.getOfficialSymbol(), summary);

        generateDatasetSummary(eevos, result, supportCount, allSupportingDatasets, queryGene);

        /*
         * FIXME I'm lazy and rushed, so I'm using an existing field for this info; probably better to add another
         * field to the value object...
         */
        for (ExpressionExperimentValueObject eevo : eevos) {
            eevo.setExternalUri(AnchorTagUtil.getExpressionExperimentUrl(eevo.getId()));
        }

        Collections.sort(ecvos);
        getGoOverlap(ecvos, queryGene);

        timer.stop();
        if (timer.getTime() > 1000) {
            log.info("Postprocess " + g2gs.size() + " results for " + queryGene.getOfficialSymbol()
                    + " PhaseIII: " + timer.getTime() + "ms");
        }
        timer.reset();
    } // Over results.

    result.getKnownGeneResults().addAll(ecvos);
    return result;
}

From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java

/**
 * @param baseSet/*from  ww w  .  j  a  va 2 s  .  co  m*/
 * @param eeIds
 * @param queryGenes
 * @param stringency
 * @param maxResults
 * @param queryGenesOnly
 * @return
 */
private Collection<CoexpressionValueObjectExt> getFilteredCannedAnalysisResults2(
        ExpressionExperimentSet baseSet, Collection<Long> eeIds, Collection<Gene> queryGenes, int stringency,
        int maxResults, boolean queryGenesOnly, boolean skipDetails) {

    if (queryGenes.isEmpty()) {
        throw new IllegalArgumentException("No genes in query");
    }

    List<ExpressionExperimentValueObject> eevos = null;
    List<Long> filteredEeIds = null;

    eevos = getSortedEEvos(eeIds);

    if (eevos.isEmpty()) {
        throw new IllegalArgumentException("There are no usable experiments in the selected set");
    }

    filteredEeIds = (List<Long>) EntityUtils.getIds(eevos);
    /*
     * We get this prior to filtering so it matches the vectors stored with the analysis.
     */
    expressionExperimentSetService.thaw(baseSet);
    List<Long> positionToIDMap = Gene2GenePopulationServiceImpl
            .getPositionToIdMap(EntityUtils.getIds(baseSet.getExperiments()));

    /*
     * This set of links must be filtered to include those in the data sets being analyzed.
     */
    Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = getRawCoexpression(queryGenes, stringency, maxResults,
            queryGenesOnly);

    List<CoexpressionValueObjectExt> ecvos = new ArrayList<CoexpressionValueObjectExt>();

    Collection<Long> seenGene2Gene = new HashSet<Long>();

    Collection<Long> queryGeneIds = gg2gs.keySet();

    // return empty collection if no coexpression results
    if (queryGeneIds.isEmpty()) {
        return ecvos;
    }

    Collection<Long> gidsNeeded = new HashSet<Long>();

    StopWatch timerGeneLoad = new StopWatch();

    for (Long gid : queryGeneIds) {

        Element e = this.getGeneLightWeightCache().getCache().get(gid);

        if (e == null) {
            gidsNeeded.add(gid);
        }

    }

    if (!gidsNeeded.isEmpty()) {

        Collection<Gene> justLoadedGenes = geneService.loadThawedLiter(gidsNeeded);

        for (Gene g : justLoadedGenes) {

            this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g));
        }
    }

    if (timerGeneLoad.getTime() > 100) {
        log.info("Loading and caching query genes took " + timerGeneLoad.getTime() + "ms");
    }

    timerGeneLoad.reset();
    timerGeneLoad.start();

    gidsNeeded = new HashSet<Long>();

    // load all genes first
    for (Long queryGid : queryGeneIds) {

        Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid);

        for (Gene2GeneCoexpression g2g : g2gs) {

            Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene()
                    : g2g.getFirstGene();

            if (this.getGeneLightWeightCache().getCache().get(foundGene.getId()) == null) {
                gidsNeeded.add(foundGene.getId());
            }

        }

    }

    // Put all needed genes in Cache to that they are guaranteed to be there
    // for this method call
    if (!gidsNeeded.isEmpty()) {

        Collection<Gene> forCache = geneService.loadThawedLiter(gidsNeeded);

        for (Gene g : forCache) {
            this.getGeneLightWeightCache().getCache().put(new Element(g.getId(), g));
        }

    }
    if (timerGeneLoad.getTime() > 100) {
        log.info("Loading and caching found genes took " + timerGeneLoad.getTime() + "ms");
    }

    StopWatch timer = new StopWatch();
    Collection<Long> allUsedGenes = new HashSet<Long>();

    for (Long queryGid : queryGeneIds) {
        timer.reset();
        timer.start();

        Gene qGene = (Gene) this.getGeneLightWeightCache().getCache().get(queryGid).getValue();

        if (!qGene.getTaxon().equals(baseSet.getTaxon())) {
            throw new IllegalArgumentException(
                    "Mismatch between taxon for expression experiment set selected and gene queries");
        }

        allUsedGenes.add(queryGid);

        /*
         * For summary statistics
         */
        CountingMap<Long> supportCount = new CountingMap<Long>();

        Collection<Long> allDatasetsWithSpecificProbes = new HashSet<Long>();
        Collection<Long> allTestedDataSets = new HashSet<Long>();

        Collection<Gene2GeneCoexpression> g2gs = gg2gs.get(queryGid);

        assert g2gs != null;

        List<Long> relevantEEIdList = null;

        if (!skipDetails) {
            relevantEEIdList = getRelevantEEidsForBitVector(positionToIDMap, g2gs);
            relevantEEIdList.retainAll(filteredEeIds);
        }
        GeneValueObject queryGeneValueObject = new GeneValueObject(qGene);

        Map<Long, Collection<Gene2GeneCoexpression>> foundGenes = new HashMap<Long, Collection<Gene2GeneCoexpression>>();

        if (timer.getTime() > 100) {
            log.info("Postprocess " + qGene.getOfficialSymbol() + " Phase I: " + timer.getTime() + "ms");
        }
        timer.stop();
        timer.reset();
        timer.start();

        for (Gene2GeneCoexpression g2g : g2gs) {

            Gene foundGene = g2g.getFirstGene().getId().equals(queryGid) ? g2g.getSecondGene()
                    : g2g.getFirstGene();

            allUsedGenes.add(foundGene.getId());

            // use this flag to test for a duplicate link with opposite
            // stringency support (positive/negative)
            boolean testForDuplicateFlag = false;

            // duplicate found genes can occur when there is both positive
            // and negative support for the same link
            // Keep track of the found genes that we can correctly identify
            // duplicates.
            // All keep the g2g object for debugging purposes.
            if (foundGenes.containsKey(foundGene.getId())) {

                testForDuplicateFlag = true;

            } else {
                foundGenes.put(foundGene.getId(), new ArrayList<Gene2GeneCoexpression>());
            }

            foundGenes.get(foundGene.getId()).add(g2g);

            CoexpressionValueObjectExt cvo = new CoexpressionValueObjectExt();

            Long idToGet = foundGene.getId();

            foundGene = (Gene) this.getGeneLightWeightCache().getCache().get(idToGet).getValue();

            if (foundGene == null) {
                log.error("Gene id:" + idToGet + " Not in the GeneLightWeightCache, something is wrong");
                continue;
            }

            cvo.setQueryGene(queryGeneValueObject);
            cvo.setFoundGene(new GeneValueObject(foundGene));

            List<Long> supportingDatasets = Gene2GenePopulationServiceImpl.getSupportingExperimentIds(g2g,
                    positionToIDMap);

            // necessary in case any were filtered out.
            supportingDatasets.retainAll(filteredEeIds);
            cvo.setSupportingExperiments(supportingDatasets);

            List<Long> testingDatasets;
            List<Long> specificDatasets;
            if (!skipDetails) {

                testingDatasets = Gene2GenePopulationServiceImpl.getTestedExperimentIds(g2g, positionToIDMap);
                testingDatasets.retainAll(filteredEeIds);

                /*
                 * necesssary in case any were filtered out (for example, if this is a virtual analysis; or there
                 * were 'troubled' ees. Note that 'supporting' includes 'non-specific' if they were recorded by the
                 * analyzer.
                 */

                specificDatasets = Gene2GenePopulationServiceImpl.getSpecificExperimentIds(g2g,
                        positionToIDMap);

                /*
                 * Specific probe EEids contains 1 even if the data set wasn't supporting.
                 */
                specificDatasets.retainAll(supportingDatasets);

                int numTestingDatasets = testingDatasets.size();
                int numSupportingDatasets = supportingDatasets.size();

                /*
                 * SANITY CHECKS
                 */
                assert specificDatasets.size() <= numSupportingDatasets;
                assert numTestingDatasets >= numSupportingDatasets;
                assert numTestingDatasets <= eevos.size();

                cvo.setDatasetVector(getDatasetVector(supportingDatasets, testingDatasets, specificDatasets,
                        relevantEEIdList));

                if (testForDuplicateFlag) {

                    testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(),
                            numSupportingDatasets, specificDatasets.size(), numTestingDatasets);

                    continue;

                } else if (numSupportingDatasets < stringency) {// check in case any data sets were filtered
                                                                // out.(i.e., we're not
                                                                // interested in the full set of data sets that
                                                                // were used in the
                                                                // original analysis.
                    continue;
                }

                allTestedDataSets.addAll(testingDatasets);

                int supportFromSpecificProbes = specificDatasets.size();

                if (g2g.getEffect() < 0) {
                    cvo.setPosSupp(0);
                    cvo.setNegSupp(numSupportingDatasets);
                    if (numSupportingDatasets != supportFromSpecificProbes)
                        cvo.setNonSpecNegSupp(numSupportingDatasets - supportFromSpecificProbes);

                } else {
                    cvo.setPosSupp(numSupportingDatasets);
                    if (numSupportingDatasets != supportFromSpecificProbes)
                        cvo.setNonSpecPosSupp(numSupportingDatasets - supportFromSpecificProbes);
                    cvo.setNegSupp(0);
                }
                cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp()));
                cvo.setNumTestedIn(numTestingDatasets);

                for (Long id : supportingDatasets) {
                    supportCount.increment(id);
                }

                allDatasetsWithSpecificProbes.addAll(specificDatasets);

            } else {

                int numSupportingDatasets = supportingDatasets.size();

                if (testForDuplicateFlag) {

                    testAndModifyDuplicateResultForOppositeStringency(ecvos, qGene, foundGene, g2g.getEffect(),
                            numSupportingDatasets, null, null);

                    continue;

                } else if (numSupportingDatasets < stringency) {
                    continue;
                }

                if (g2g.getEffect() < 0) {
                    cvo.setPosSupp(0);
                    cvo.setNegSupp(numSupportingDatasets);
                } else {
                    cvo.setPosSupp(numSupportingDatasets);
                    cvo.setNegSupp(0);
                }

                cvo.setSupportKey(Math.max(cvo.getPosSupp(), cvo.getNegSupp()));

            }

            cvo.setSortKey();

            /*
             * This check prevents links from being shown twice when we do "among query genes". We don't skip
             * entirely so we get the counts for the summary table populated correctly.
             */
            if (!seenGene2Gene.contains(g2g.getId())) {
                ecvos.add(cvo);
            }

            seenGene2Gene.add(g2g.getId());

        }

        if (timer.getTime() > 300) {
            log.info("Postprocess " + g2gs.size() + " results for " + qGene.getOfficialSymbol() + " Phase II: "
                    + timer.getTime() + "ms");
        }
        timer.stop();

        Collections.sort(ecvos);

    } // Over querygenes

    populateNodeDegree(ecvos, allUsedGenes, filteredEeIds);
    return ecvos;

}

From source file:ubic.gemma.analysis.expression.coexpression.GeneCoexpressionServiceImpl.java

/**
 * Retrieve all gene2gene coexpression information for the genes at the specified stringency, using methods that
 * don't filter by experiment.//  w ww . j av  a  2s .com
 * 
 * @param queryGenes
 * @param stringency
 * @param maxResults
 * @param queryGenesOnly
 * @return
 */
private Map<Long, Collection<Gene2GeneCoexpression>> getRawCoexpression(Collection<Gene> queryGenes,
        int stringency, int maxResults, boolean queryGenesOnly) {
    Map<Long, Collection<Gene2GeneCoexpression>> gg2gs = new HashMap<Long, Collection<Gene2GeneCoexpression>>();

    if (queryGenes.size() == 0) {
        return gg2gs;
    }

    StopWatch timer = new StopWatch();
    timer.start();
    GeneCoexpressionAnalysis gA = findEnabledCoexpressionAnalysis(queryGenes);
    timer.stop();
    if (timer.getTime() > 100) {
        log.info("Get analysis: " + timer.getTime() + "ms");
    }
    timer.reset();
    timer.start();

    if (queryGenesOnly) {
        if (queryGenes.size() < 2) {
            throw new IllegalArgumentException("Must have at least two genes to do 'my genes only'");
        }
        gg2gs = gene2GeneCoexpressionService.findInterCoexpressionRelationship(queryGenes, stringency, gA);
    } else {
        gg2gs = gene2GeneCoexpressionService.findCoexpressionRelationships(queryGenes, stringency, maxResults,
                gA);
    }

    if (timer.getTime() > 1000) {
        log.info("Get raw coexpression: " + timer.getTime() + "ms");
    }

    return gg2gs;
}

From source file:ubic.gemma.analysis.expression.coexpression.links.MatrixRowPairPearsonAnalysis.java

/**
 * Calculate the linear correlation matrix of a matrix, allowing missing values. If there are no missing values,
 * this calls PearsonFast.//from   w  w w.ja va2s  .  com
 */
@Override
public void calculateMetrics() {

    if (this.numMissing == 0) {
        calculateMetricsFast();
        return;
    }

    int numused;
    int numrows = this.dataMatrix.rows();
    int numcols = this.dataMatrix.columns();

    if (numcols < this.minNumUsed) {
        throw new IllegalArgumentException("Sorry, correlations will not be computed unless there are at least "
                + this.minNumUsed + " mutually present data points per vector pair, current data has only "
                + numcols + " columns.");
    }

    boolean docalcs = this.needToCalculateMetrics();
    boolean[][] usedB = new boolean[][] {};
    double[][] data = new double[][] {};
    if (docalcs) {
        // Temporarily copy the data in this matrix, for performance.
        usedB = new boolean[numrows][numcols];
        data = new double[numrows][numcols];
        for (int i = 0; i < numrows; i++) { // first vector
            for (int j = 0; j < numcols; j++) { // second vector
                usedB[i][j] = used.get(i, j); // this is only needed if we use it below, speeds things up
                // slightly.
                data[i][j] = this.dataMatrix.get(i, j);
            }
        }

        rowStatistics();
    }

    /* for each vector, compare it to all other vectors */
    ExpressionDataMatrixRowElement itemA = null;
    StopWatch timer = new StopWatch();
    timer.start();
    double[] vectorA = new double[] {};
    double syy, sxy, sxx, sx, sy, xj, yj;
    int skipped = 0;
    int numComputed = 0;
    for (int i = 0; i < numrows; i++) { // first vector
        itemA = this.dataMatrix.getRowElement(i);
        if (!this.hasGene(itemA)) {
            skipped++;
            continue;
        }
        if (docalcs) {
            vectorA = data[i];
        }

        boolean thisRowHasMissing = hasMissing[i];

        for (int j = i + 1; j < numrows; j++) { // second vector
            ExpressionDataMatrixRowElement itemB = this.dataMatrix.getRowElement(j);
            if (!this.hasGene(itemB))
                continue;

            // second pass over matrix? Don't calculate it if we already have it. Just do the requisite checks.
            if (!docalcs || results.get(i, j) != 0.0) {
                keepCorrel(i, j, results.get(i, j), numcols);
                continue;
            }

            double[] vectorB = data[j];

            /* if there are no missing values, use the faster method of calculation */
            if (!thisRowHasMissing && !hasMissing[j]) {
                setCorrel(i, j, correlFast(vectorA, vectorB, i, j), numcols);
                continue;
            }

            /* do it the old fashioned way */
            numused = 0;
            sxy = 0.0;
            sxx = 0.0;
            syy = 0.0;
            sx = 0.0;
            sy = 0.0;
            for (int k = 0; k < numcols; k++) {
                xj = vectorA[k];
                yj = vectorB[k];
                if (usedB[i][k] && usedB[j][k]) { /* this is a bit faster than calling Double.isNan */
                    sx += xj;
                    sy += yj;
                    sxy += xj * yj;
                    sxx += xj * xj;
                    syy += yj * yj;
                    numused++;
                }
            }

            // avoid -1 correlations or extremely noisy values (minNumUsed should be set high enough so that degrees
            // of freedom isn't too low.
            if (numused < this.minNumUsed)
                setCorrel(i, j, Double.NaN, 0);
            else {
                double denom = correlationNorm(numused, sxx, sx, syy, sy);
                if (denom <= 0.0) { // means variance is zero for one of the vectors.
                    setCorrel(i, j, 0.0, numused);
                } else {
                    double correl = (sxy - sx * sy / numused) / Math.sqrt(denom);

                    setCorrel(i, j, correl, numused);
                }
            }
            ++numComputed;

        }
        if ((i + 1) % 2000 == 0) {
            double t = timer.getTime() / 1000.0;
            log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA
                    + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : "")
                    + String.format(", time elapsed since last check: %.2f", t) + "s");
            timer.reset();
            timer.start();
        }
    }
    log.info(skipped + " rows skipped, where probe lacks a gene annotation");
    finishMetrics();
}

From source file:ubic.gemma.analysis.expression.coexpression.links.MatrixRowPairPearsonAnalysis.java

/**
 * Calculate a linear correlation matrix for a matrix. Use this if you know there are no missing values, or don't
 * care about NaNs. Rows that are not mapped to genes are skipped.
 * // w w w  .  j ava 2 s .  com
 * @param duplicates The map containing information about what items are the 'same' as other items; such are
 *        skipped.
 */
private void calculateMetricsFast() {
    int numrows = this.dataMatrix.rows();
    int numcols = this.dataMatrix.columns();
    boolean docalcs = this.needToCalculateMetrics();

    double[][] data = new double[][] {};
    if (docalcs) {
        rowStatistics();

        // Temporarily put the data in this matrix (performance)
        data = new double[numrows][numcols];
        for (int i = 0; i < numrows; i++) { // first vector
            for (int j = 0; j < numcols; j++) { // second vector
                data[i][j] = this.dataMatrix.get(i, j);
            }
        }
    }

    /*
     * For each vector, compare it to all other vectors, avoid repeating things; skip items that don't have genes
     * mapped to them.
     */
    StopWatch timer = new StopWatch();
    timer.start();
    ExpressionDataMatrixRowElement itemA = null;
    ExpressionDataMatrixRowElement itemB = null;
    double[] vectorA = null;
    int skipped = 0;
    int numComputed = 0;
    for (int i = 0; i < numrows; i++) {
        itemA = this.dataMatrix.getRowElement(i);
        if (!this.hasGene(itemA)) {
            skipped++;
            continue;
        }
        if (docalcs) {
            vectorA = data[i];
        }

        for (int j = i + 1; j < numrows; j++) {
            itemB = this.dataMatrix.getRowElement(j);
            if (!this.hasGene(itemB))
                continue;
            if (!docalcs || results.get(i, j) != 0.0) { // second pass over matrix. Don't calculate it
                // if we
                // already have it. Just do the requisite checks.
                keepCorrel(i, j, results.get(i, j), numcols);
                continue;
            }

            double[] vectorB = data[j];
            setCorrel(i, j, correlFast(vectorA, vectorB, i, j), numcols);
            ++numComputed;
        }
        if ((i + 1) % 2000 == 0) {
            double t = timer.getTime() / 1000.0;
            log.info((i + 1) + " rows done, " + numComputed + " correlations computed, last row was " + itemA
                    + " " + (keepers.size() > 0 ? keepers.size() + " scores retained" : "")
                    + String.format(", time elapsed since last check: %.2f", t) + "s");
            timer.reset();
            timer.start();
        }
    }
    log.info(skipped + " rows skipped, due to no gene association");
    finishMetrics();

}

From source file:ubic.gemma.analysis.expression.diff.GeneDifferentialExpressionServiceImpl.java

@Override
public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene, double threshold,
        Integer limit) {//  w ww . j  av a2 s  .c  o m

    StopWatch timer = new StopWatch();
    timer.start();

    Collection<DifferentialExpressionValueObject> devos = new ArrayList<DifferentialExpressionValueObject>();

    if (gene == null)
        return devos;

    if (timer.getTime() > 1000) {
        log.info("Find experiments: " + timer.getTime() + " ms");
    }
    timer.reset();
    timer.start();

    Map<BioAssaySet, List<DifferentialExpressionAnalysisResult>> results = differentialExpressionResultService
            .find(gene, threshold, limit);
    timer.stop();

    if (timer.getTime() > 1000) {
        log.info("Diff raw ex results: " + timer.getTime() + " ms");
    }

    return postProcessDiffExResults(gene, threshold, results);
}

From source file:ubic.gemma.analysis.expression.diff.LinearModelAnalyzer.java

/**
 * Linear models solved/*  w w w.  j  a va  2 s.  co m*/
 * 
 * @param designMatrix
 * @param data
 * @param rawResults
 * @param quantitationType
 * @return
 */
private Future<?> runAnalysisFuture(final DesignMatrix designMatrix, final DoubleMatrix<String, String> data,
        final Map<String, LinearModelSummary> rawResults, final QuantitationType quantitationType) {
    ExecutorService service = Executors.newSingleThreadExecutor();

    Future<?> f = service.submit(new Runnable() {
        @Override
        public void run() {
            StopWatch timer = new StopWatch();
            timer.start();
            LeastSquaresFit fit = null;
            if (quantitationType.getScale().equals(ScaleType.COUNT)) {
                log.info(" Performing weighted least squares regression on COUNT data ");
                MeanVarianceEstimator mv = new MeanVarianceEstimator(designMatrix, data);
                fit = new LeastSquaresFit(designMatrix, data, mv.getWeights());
            } else {
                fit = new LeastSquaresFit(designMatrix, data);
            }
            log.info("Model fit data matrix " + data.rows() + " x " + data.columns() + ": " + timer.getTime()
                    + "ms");
            timer.reset();
            timer.start();
            Map<String, LinearModelSummary> res = fit.summarizeByKeys(true);
            log.info("Model summarize/ANOVA: " + timer.getTime() + "ms");
            rawResults.putAll(res);
            log.info("Analysis phase done ...");
        }
    });

    service.shutdown();
    return f;
}