ubic.gemma.core.analysis.report.DatabaseViewGeneratorImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.analysis.report.DatabaseViewGeneratorImpl.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2009 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.analysis.report;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.gemma.core.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.model.analysis.expression.diff.ContrastResult;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult;
import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.util.EntityUtils;

import java.io.*;
import java.util.Collection;
import java.util.zip.GZIPOutputStream;

/**
 * Generates textual views of the database so other people can use the data.
 * Development of this was started due to the collaboration with NIF.
 * It is essential that these views be created by a principal with Anonymous status, so as not to create views of
 * private data (that could be done, but would be separate).
 *
 * @author paul
 */
@Component
public class DatabaseViewGeneratorImpl implements DatabaseViewGenerator {

    private static final double THRESH_HOLD = 0.01;
    private static final String DATASET_SUMMARY_VIEW_BASENAME = "DatasetSummary";
    private static final String DATASET_TISSUE_VIEW_BASENAME = "DatasetTissue";
    private static final String DATASET_DIFFEX_VIEW_BASENAME = "DatasetDiffEx";
    private static final Log log = LogFactory.getLog(DatabaseViewGeneratorImpl.class);
    @Autowired
    private ExpressionExperimentService expressionExperimentService;

    @Autowired
    private CompositeSequenceService compositeSequenceService;

    @Autowired
    private DifferentialExpressionAnalysisService differentialExpressionAnalysisService;

    @Autowired
    private ArrayDesignService arrayDesignService;

    @Override
    public File getOutputFile(String filename) {
        String fullFilePath = DatabaseViewGenerator.VIEW_DIR + filename;
        File f = new File(fullFilePath);

        if (f.exists()) {
            return f;
        }

        EntityUtils.mkdirs(f.getParentFile());
        return f;
    }

    @Override
    public void runAll() {
        this.runAll(null);
    }

    @Override
    public void runAll(Integer limit) {
        Collection<ExpressionExperiment> ees = expressionExperimentService.loadAll();
        try {
            this.generateDatasetView(limit, ees);
            this.generateDatasetTissueView(limit, ees);
            this.generateDifferentialExpressionView(limit, ees);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void generateDatasetTissueView(Integer limit, Collection<ExpressionExperiment> experiments)
            throws IOException {
        DatabaseViewGeneratorImpl.log.info("Generating dataset tissue view");

        /*
         * Get handle to output file
         */
        File file = this.getViewFile(DatabaseViewGeneratorImpl.DATASET_TISSUE_VIEW_BASENAME);
        DatabaseViewGeneratorImpl.log.info("Writing to " + file);
        try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {

            /*
             * For all of their annotations... if it's a tissue, print out a line
             */
            writer.write("GemmaDsId\tTerm\tTermURI\n");
            int i = 0;
            for (ExpressionExperiment ee : experiments) {
                ee = expressionExperimentService.thawLite(ee);

                DatabaseViewGeneratorImpl.log.info("Processing: " + ee.getShortName());

                Long gemmaId = ee.getId();

                for (Characteristic c : ee.getCharacteristics()) {

                    if (StringUtils.isBlank(c.getValue())) {
                        continue;
                    }

                    /*
                     * check if vocab characteristic.
                     */

                    if (c.getCategory().equals("OrganismPart")) { // or tissue? check URI

                        String uri = "";

                        if (StringUtils.isNotBlank(c.getValueUri()))
                            uri = c.getValueUri();

                        writer.write(String.format("%d\t%s\t%s\n", gemmaId, c.getValue(), uri));

                    }

                }

                if (limit != null && (limit > 0 && ++i > limit))
                    break;

            }

        }
    }

    private void generateDatasetView(Integer limit, Collection<ExpressionExperiment> experiments)
            throws IOException {

        DatabaseViewGeneratorImpl.log.info("Generating dataset summary view");

        /*
         * Get handle to output file
         */
        File file = this.getViewFile(DatabaseViewGeneratorImpl.DATASET_SUMMARY_VIEW_BASENAME);
        DatabaseViewGeneratorImpl.log.info("Writing to " + file);
        try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {

            writer.write("GemmaDsId\tSource\tSourceAccession\tShortName\tName\tDescription\ttaxon\tManufacturer\n");

            /*
             * Print out their names etc.
             */
            int i = 0;
            for (ExpressionExperiment ee : experiments) {
                ee = expressionExperimentService.thawLite(ee);
                DatabaseViewGeneratorImpl.log.info("Processing: " + ee.getShortName());

                String acc = "";
                String source = "";

                if (ee.getAccession() != null && ee.getAccession().getAccession() != null) {
                    acc = ee.getAccession().getAccession();
                    source = ee.getAccession().getExternalDatabase().getName();
                }

                Long gemmaId = ee.getId();
                String shortName = ee.getShortName();
                String name = ee.getName();
                String description = ee.getDescription();
                description = StringUtils.replaceChars(description, '\t', ' ');
                description = StringUtils.replaceChars(description, '\n', ' ');
                description = StringUtils.replaceChars(description, '\r', ' ');

                Taxon taxon = expressionExperimentService.getTaxon(ee);

                if (taxon == null)
                    continue;

                Collection<ArrayDesign> ads = expressionExperimentService.getArrayDesignsUsed(ee);
                StringBuilder manufacturers = new StringBuilder();

                // TODO could cache the arrayDesigns to make faster, thawing ad is time consuming
                for (ArrayDesign ad : ads) {
                    ad = arrayDesignService.thawLite(ad);
                    if (ad.getDesignProvider() == null) {
                        DatabaseViewGeneratorImpl.log.debug("Array Design: " + ad.getShortName()
                                + " has no design provoider assoicated with it. Skipping");
                        continue;
                    }
                    manufacturers.append(ad.getDesignProvider().getName()).append(",");
                }

                writer.write(String.format("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", gemmaId, source, acc, shortName,
                        name, description, taxon.getCommonName(),
                        StringUtils.removeEnd(manufacturers.toString(), ",")));

                if (limit != null && (limit > 0 && ++i > limit))
                    break;

            }

        }
    }

    private void generateDifferentialExpressionView(Integer limit, Collection<ExpressionExperiment> experiments)
            throws IOException {
        DatabaseViewGeneratorImpl.log.info("Generating dataset diffex view");

        /*
         * Get handle to output file
         */
        File file = this.getViewFile(DatabaseViewGeneratorImpl.DATASET_DIFFEX_VIEW_BASENAME);
        DatabaseViewGeneratorImpl.log.info("Writing to " + file);
        try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {

            /*
             * For each gene that is differentially expressed, print out a line per contrast
             */
            writer.write(
                    "GemmaDsId\tEEShortName\tGeneNCBIId\tGemmaGeneId\tFactor\tFactorURI\tBaseline\tContrasting\tDirection\n");
            int i = 0;
            for (ExpressionExperiment ee : experiments) {
                ee = expressionExperimentService.thawLite(ee);

                Collection<DifferentialExpressionAnalysis> results = differentialExpressionAnalysisService
                        .getAnalyses(ee);
                if (results == null || results.isEmpty()) {
                    DatabaseViewGeneratorImpl.log.warn("No differential expression results found for " + ee);
                    continue;
                }

                //noinspection StatementWithEmptyBody // FIXME. Should probably skip for this purpose.
                if (results.size() > 1) {
                }

                DatabaseViewGeneratorImpl.log.info("Processing: " + ee.getShortName());

                for (DifferentialExpressionAnalysis analysis : results) {

                    analysis = this.differentialExpressionAnalysisService.thawFully(analysis);

                    for (ExpressionAnalysisResultSet ears : analysis.getResultSets()) {

                        // ears = differentialExpressionResultService.thawRawAndProcessed( ears );

                        FactorValue baselineGroup = ears.getBaselineGroup();

                        if (baselineGroup == null) {
                            // log.warn( "No baseline defined for " + ee ); // interaction
                            continue;
                        }

                        if (ExperimentalDesignUtils.isBatch(baselineGroup.getExperimentalFactor())) {
                            continue;
                        }

                        String baselineDescription = ExperimentalDesignUtils.prettyString(baselineGroup);

                        // Get the factor category name
                        StringBuilder factorName = new StringBuilder();
                        StringBuilder factorURI = new StringBuilder();

                        for (ExperimentalFactor ef : ears.getExperimentalFactors()) {
                            factorName.append(ef.getName()).append(",");
                            if (StringUtils.isNotBlank(ef.getCategory().getCategoryUri())) {
                                factorURI.append(ef.getCategory().getCategoryUri()).append(",");
                            }
                        }
                        factorName = new StringBuilder(StringUtils.removeEnd(factorName.toString(), ","));
                        factorURI = new StringBuilder(StringUtils.removeEnd(factorURI.toString(), ","));

                        if (ears.getResults() == null || ears.getResults().isEmpty()) {
                            DatabaseViewGeneratorImpl.log
                                    .warn("No  differential expression analysis results found for " + ee);
                            continue;
                        }

                        // Generate probe details
                        for (DifferentialExpressionAnalysisResult dear : ears.getResults()) {

                            if (dear == null) {
                                DatabaseViewGeneratorImpl.log
                                        .warn("Missing results for " + ee + " skipping to next. ");
                                continue;
                            }

                            if (dear.getCorrectedPvalue() == null
                                    || dear.getCorrectedPvalue() > DatabaseViewGeneratorImpl.THRESH_HOLD)
                                continue;

                            String formatted = this.formatDiffExResult(ee, dear, factorName.toString(),
                                    factorURI.toString(), baselineDescription);

                            if (StringUtils.isNotBlank(formatted))
                                writer.write(formatted);

                        } // dear loop
                    } // ears loop
                } // analysis loop

                if (limit != null && (limit > 0 && ++i > limit))
                    break;

            } // EE loop
        }
    }

    private String formatDiffExResult(ExpressionExperiment ee,
            DifferentialExpressionAnalysisResult probeAnalysisResult, String factorName, String factorURI,
            String baselineDescription) {

        CompositeSequence cs = probeAnalysisResult.getProbe();

        Collection<Gene> genes = compositeSequenceService.getGenes(cs);

        if (genes.isEmpty() || genes.size() > 1) {
            return null;
        }

        Gene g = genes.iterator().next();

        if (g.getNcbiGeneId() == null)
            return null;

        Collection<ContrastResult> contrasts = probeAnalysisResult.getContrasts();

        StringBuilder buf = new StringBuilder();
        for (ContrastResult cr : contrasts) {
            FactorValue factorValue = cr.getFactorValue();

            String direction = cr.getLogFoldChange() < 0 ? "-" : "+";

            String factorValueDescription = ExperimentalDesignUtils.prettyString(factorValue);

            buf.append(String.format("%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n", ee.getId(), ee.getShortName(),
                    g.getNcbiGeneId().toString(), g.getId(), factorName, factorURI, baselineDescription,
                    factorValueDescription, direction));
        }

        return buf.toString();
    }

    private File getViewFile(String datasetDiffexViewBasename) {
        return this.getOutputFile(datasetDiffexViewBasename + DatabaseViewGenerator.VIEW_FILE_SUFFIX);
    }

}