ubic.gemma.core.apps.DifferentialExpressionAnalysisCli.java Source code

Introduction

Here is the source code for ubic.gemma.core.apps.DifferentialExpressionAnalysisCli.java
Source

/*
 * The Gemma project
 *
 * Copyright (c) 2006-2011 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.apps;

import gemma.gsec.SecurityService;
import org.apache.commons.cli.Option;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalysisConfig;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerService;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerServiceImpl.AnalysisType;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationServiceImpl;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
import ubic.gemma.core.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.core.util.AbstractCLI;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.common.auditAndSecurity.eventType.DifferentialExpressionAnalysisEvent;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService;
import ubic.gemma.persistence.service.expression.experiment.ExperimentalFactorService;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;

/**
 * A command line interface to the {@link DifferentialExpressionAnalysis}.
 *
 * @author keshav
 */
public class DifferentialExpressionAnalysisCli extends ExpressionExperimentManipulatingCLI {

    private final List<Long> factorIds = new ArrayList<>();
    private final List<String> factorNames = new ArrayList<>();
    private DifferentialExpressionAnalyzerService differentialExpressionAnalyzerService = null;
    /**
     * Whether batch factors should be included (if they exist)
     */
    private boolean ignoreBatch = true;
    private boolean delete = false;
    private DifferentialExpressionAnalysisService differentialExpressionAnalysisService;
    private ExpressionDataFileService expressionDataFileService;
    private Long subsetFactorId;
    private String subsetFactorName;
    private boolean tryToCopyOld = false;
    /*
     * Used when processing a single experiment.
     */
    private AnalysisType type = null;

    /**
     * Use moderated statistics.
     */
    private boolean ebayes = DifferentialExpressionAnalysisConfig.DEFAULT_EBAYES;

    private boolean persist = true;

    public static void main(String[] args) {
        DifferentialExpressionAnalysisCli p = new DifferentialExpressionAnalysisCli();
        executeCommand(p, args);

    }

    @Override
    public String getCommandName() {
        return "diffExAnalyze";
    }

    @Override
    protected Exception doWork(String[] args) {
        Exception err = this.processCommandLine(args);
        if (err != null) {
            return err;
        }

        SecurityService securityService = this.getBean(SecurityService.class);

        for (BioAssaySet ee : expressionExperiments) {
            if (!(ee instanceof ExpressionExperiment)) {
                continue;
            }

            if (expressionExperiments.size() > 1) {
                AbstractCLI.log.info(">>>>>> Begin processing: " + ee);
            }

            /*
             * This is really only important when running as admin and in a batch mode.
             */
            AbstractCLI.log.debug(securityService.getOwner(ee));

            if (!securityService.isOwnedByCurrentUser(ee) && this.expressionExperiments.size() > 1) {
                AbstractCLI.log.warn("Experiment is not owned by current user, skipping: " + ee);
                continue;
            }

            this.processExperiment((ExpressionExperiment) ee);
        }

        this.summarizeProcessing();

        return null;
    }

    @Override
    public String getShortDesc() {
        return "Analyze expression data sets for differentially expressed genes.";
    }

    @SuppressWarnings("static-access")
    @Override
    protected void buildOptions() {

        /*
         * These options from the super class support: running on one or more data sets from the command line, running
         * on list of data sets from a file, running on all data sets.
         */
        super.buildOptions();

        /* Supports: running on all data sets that have not been run since a given date. */
        super.addDateOption();

        //
        //        Option topOpt = Option.builder( "top" ).hasArg().argName( "number" ).desc( "The top (most significant) results to display." )
        //                .build();
        //        super.addOption( topOpt );

        super.addAutoOption();
        this.autoSeekEventType = DifferentialExpressionAnalysisEvent.class;
        super.addForceOption();

        Option factors = Option.builder("factors").desc(
                "ID numbers, categories or names of the factor(s) to use, comma-delimited, with spaces replaced by underscores")
                .build();

        super.addOption(factors);

        Option subsetFactor = Option.builder("subset").desc(
                "ID number, category or name of the factor to use for subsetting the analysis; must also use with -factors")
                .build();
        super.addOption(subsetFactor);

        Option analysisType = Option.builder("type").hasArg().desc(
                "Type of analysis to perform. If omitted, the system will try to guess based on the experimental design. "
                        + "Choices are : TWO_WAY_ANOVA_WITH_INTERACTION, "
                        + "TWO_WAY_ANOVA_NO_INTERACTION , OWA (one-way ANOVA), TTEST, OSTTEST (one-sample t-test),"
                        + " GENERICLM (generic LM, no interactions); default: auto-detect")
                .build();

        super.addOption(analysisType);

        Option ignoreBatchOption = Option.builder("usebatch").desc(
                "If a 'batch' factor is available, use it. Otherwise, batch information can/will be ignored in the analysis.")
                .build();

        super.addOption(ignoreBatchOption);

        super.addOption("nodb", "Output files only to your gemma.appdata.home instead of database");

        super.addOption("redo", "If using automatic analysis "
                + "try to base analysis on previous analyses. Will re-run all analyses for the experiment");

        super.addOption("delete",
                "Instead of running the analysis on the given experiments, remove the old analyses. Use with care!");

        super.addOption("ebayes", "Use empirical-Bayes moderated statistics. Default: "
                + DifferentialExpressionAnalysisConfig.DEFAULT_EBAYES);

    }

    @Override
    protected void processOptions() {
        super.processOptions();
        differentialExpressionAnalyzerService = this.getBean(DifferentialExpressionAnalyzerService.class);
        differentialExpressionAnalysisService = this.getBean(DifferentialExpressionAnalysisService.class);
        expressionDataFileService = this.getBean(ExpressionDataFileService.class);
        if (this.hasOption("type")) {

            if (this.expressionExperiments.size() > 1) {
                throw new IllegalArgumentException(
                        "You can only specify the analysis type when analyzing a single experiment");
            }

            if (!this.hasOption("factors")) {
                throw new IllegalArgumentException(
                        "Please specify the factor(s) when specifying the analysis type.");
            }
            this.type = AnalysisType.valueOf(this.getOptionValue("type"));
        }

        if (this.hasOption("subset")) {
            if (this.expressionExperiments.size() > 1) {
                throw new IllegalArgumentException(
                        "You can only specify the subset factor when analyzing a single experiment");
            }

            if (!this.hasOption("factors")) {
                throw new IllegalArgumentException(
                        "You have to specify the factors if you also specify the subset");
            }

            String subsetFactor = this.getOptionValue("subset");
            try {
                this.subsetFactorId = Long.parseLong(subsetFactor);

            } catch (NumberFormatException e) {
                this.subsetFactorName = subsetFactor;
            }
        }

        if (this.hasOption("usebatch")) {
            this.ignoreBatch = false;
        }

        if (this.hasOption("delete")) {
            this.delete = true;
        }

        if (this.hasOption("ebayes")) {
            this.ebayes = true;
        }

        if (this.hasOption("nodb")) {
            this.persist = false;
        }

        this.tryToCopyOld = this.hasOption("redo");

        if (this.hasOption("factors")) {

            if (this.tryToCopyOld) {
                throw new IllegalArgumentException("You can't specify 'redo' and 'factors' together");
            }

            if (this.expressionExperiments.size() > 1) {
                throw new IllegalArgumentException(
                        "You can only specify the factors when analyzing a single experiment");
            }

            String rawFactors = this.getOptionValue("factors");
            String[] factorIDst = StringUtils.split(rawFactors, ",");
            if (factorIDst != null && factorIDst.length > 0) {
                for (String string : factorIDst) {
                    try {
                        Long factorId = Long.parseLong(string);
                        this.factorIds.add(factorId);
                    } catch (NumberFormatException e) {
                        this.factorNames.add(string);
                    }
                }
            }
        }
    }

    private void processExperiment(ExpressionExperiment ee) {
        Collection<DifferentialExpressionAnalysis> results;
        DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();

        try {

            ee = this.eeService.thawLite(ee);

            if (delete) {
                AbstractCLI.log.info("Deleting any analyses for experiment=" + ee);
                differentialExpressionAnalyzerService.deleteAnalyses(ee);
                successObjects.add("Deleted analysis for: " + ee.toString());
                return;
            }

            Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign()
                    .getExperimentalFactors();
            if (experimentalFactors.size() == 0) {
                if (this.expressionExperiments.size() == 1) {
                    /*
                     * Only need to be noisy if this is the only ee. Batch processing should be less so.
                     */
                    throw new RuntimeException(
                            "Experiment does not have an experimental design populated: " + ee.getShortName());
                }
                AbstractCLI.log
                        .warn("Experiment does not have an experimental design populated: " + ee.getShortName());
                return;
            }

            Collection<ExperimentalFactor> factors = this.guessFactors(ee);

            if (factors.size() > 0) {
                /*
                 * Manual selection of factors
                 */
                ExperimentalFactor subsetFactor = this.getSubsetFactor(ee);

                AbstractCLI.log.info("Using " + factors.size() + " factors provided as arguments");

                if (subsetFactor != null) {
                    if (factors.contains(subsetFactor)) {
                        throw new IllegalArgumentException(
                                "Subset factor cannot also be included as factor to analyze");
                    }
                    AbstractCLI.log.info("Subsetting by " + subsetFactor);

                }

                config.setAnalysisType(this.type);
                config.setFactorsToInclude(factors);
                config.setSubsetFactor(subsetFactor);
                config.setModerateStatistics(this.ebayes);
                config.setPersist(this.persist);
                boolean rnaSeq = super.eeService.isRNASeq(ee);
                config.setUseWeights(rnaSeq);
                /*
                 * Interactions included by default. It's actually only complicated if there is a subset factor.
                 */
                if (type == null && factors.size() == 2) {
                    config.getInteractionsToInclude().add(factors);
                }

                results = this.differentialExpressionAnalyzerService.runDifferentialExpressionAnalyses(ee, config);

            } else {
                /*
                 * Automatically
                 */

                if (tryToCopyOld) {
                    this.tryToRedoBasedOnOldAnalysis(ee);
                }

                Collection<ExperimentalFactor> factorsToUse = new HashSet<>();

                if (this.ignoreBatch) {
                    for (ExperimentalFactor ef : experimentalFactors) {
                        if (!ExperimentalDesignUtils.isBatch(ef)) {
                            factorsToUse.add(ef);
                        }
                    }
                } else {
                    factorsToUse.addAll(experimentalFactors);
                }

                if (factorsToUse.isEmpty()) {
                    throw new RuntimeException("No factors available for " + ee.getShortName());
                }

                if (factorsToUse.size() > 3) {

                    if (!tryToCopyOld) {
                        throw new RuntimeException("Experiment has too many factors to run automatically: "
                                + ee.getShortName()
                                + "; try using the -redo flag to base it on an old analysis, or select factors manually");
                    }
                    results = this.tryToRedoBasedOnOldAnalysis(ee);

                } else {

                    config.setFactorsToInclude(factorsToUse);
                    config.setPersist(this.persist);
                    config.setModerateStatistics(this.ebayes);

                    if (factorsToUse.size() == 2) {
                        // include interactions by default
                        config.addInteractionToInclude(factorsToUse);
                    }

                    boolean rnaSeq = super.eeService.isRNASeq(ee);
                    config.setUseWeights(rnaSeq);

                    results = this.differentialExpressionAnalyzerService.runDifferentialExpressionAnalyses(ee,
                            config);
                }

            }

            if (results == null) {
                throw new Exception(
                        "Failed to process differential expression for experiment " + ee.getShortName());
            }

            if (!this.persist) {
                AbstractCLI.log.info("Writing results to disk");
                for (DifferentialExpressionAnalysis r : results) {
                    expressionDataFileService.writeDiffExArchiveFile(ee, r, config);
                }
            }

            successObjects.add(ee.toString());

        } catch (Exception e) {
            AbstractCLI.log.error("Error while processing " + ee + ": " + e.getMessage());
            ExceptionUtils.printRootCauseStackTrace(e);
            errorObjects.add(ee + ": " + e.getMessage());
        }

    }

    private ExperimentalFactor getSubsetFactor(ExpressionExperiment ee) {
        ExperimentalFactorService efs = this.getBean(ExperimentalFactorService.class);
        ExperimentalFactor subsetFactor = null;
        if (StringUtils.isNotBlank(this.subsetFactorName)) {
            Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign()
                    .getExperimentalFactors();
            for (ExperimentalFactor experimentalFactor : experimentalFactors) {

                // has already implemented way of figuring out human-friendly name of factor value.
                ExperimentalFactorValueObject fvo = new ExperimentalFactorValueObject(experimentalFactor);

                if (ignoreBatch && BatchInfoPopulationServiceImpl.isBatchFactor(experimentalFactor)) {
                    AbstractCLI.log.info("Ignoring batch factor:" + experimentalFactor);
                    continue;
                }

                if (subsetFactorName.equals(experimentalFactor.getName().replaceAll(" ", "_"))) {
                    subsetFactor = experimentalFactor;
                } else if (fvo.getCategory() != null
                        && subsetFactorName.equals(fvo.getCategory().replaceAll(" ", "_"))) {
                    subsetFactor = experimentalFactor;
                }
            }

            if (subsetFactor == null)
                throw new IllegalArgumentException("Didn't find factor for provided subset factor name");

            return subsetFactor;

        } else if (this.subsetFactorId != null) {
            subsetFactor = efs.load(subsetFactorId);
            if (subsetFactor == null) {
                throw new IllegalArgumentException("No factor for id=" + subsetFactorId);
            }
            return subsetFactor;
        }
        return null;
    }

    /**
     * Determine which factors to use if given from the command line. Only applicable if analysis is on a single data
     * set.
     */
    private Collection<ExperimentalFactor> guessFactors(ExpressionExperiment ee) {
        Collection<ExperimentalFactor> factors = new HashSet<>();

        ExperimentalFactorService efs = this.getBean(ExperimentalFactorService.class);
        if (this.factorNames.size() > 0) {
            if (this.factorIds.size() > 0) {
                throw new IllegalArgumentException("Please provide factor names or ids, not a mixture of each");
            }
            Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign()
                    .getExperimentalFactors();
            for (ExperimentalFactor experimentalFactor : experimentalFactors) {

                // has already implemented way of figuring out human-friendly name of factor value.
                ExperimentalFactorValueObject fvo = new ExperimentalFactorValueObject(experimentalFactor);

                if (ignoreBatch && BatchInfoPopulationServiceImpl.isBatchFactor(experimentalFactor)) {
                    AbstractCLI.log.info("Ignoring batch factor:" + experimentalFactor);
                    continue;
                }

                if (factorNames.contains(experimentalFactor.getName().replaceAll(" ", "_"))) {
                    factors.add(experimentalFactor);
                } else if (fvo.getCategory() != null
                        && factorNames.contains(fvo.getCategory().replaceAll(" ", "_"))) {
                    factors.add(experimentalFactor);
                }
            }

            if (factors.size() != factorNames.size()) {
                throw new IllegalArgumentException("Didn't find factors for all the provided factor names");
            }

        } else if (this.factorIds.size() > 0) {
            for (Long factorId : factorIds) {
                if (this.factorNames.size() > 0) {
                    throw new IllegalArgumentException("Please provide factor names or ids, not a mixture of each");
                }
                ExperimentalFactor factor = efs.load(factorId);
                factor = efs.thaw(factor);
                if (factor == null) {
                    throw new IllegalArgumentException("No factor for id=" + factorId);
                }
                if (!factor.getExperimentalDesign().equals(ee.getExperimentalDesign())) {
                    throw new IllegalArgumentException("Factor with id=" + factorId + " does not belong to " + ee);
                }

                if (ignoreBatch && BatchInfoPopulationServiceImpl.isBatchFactor(factor)) {
                    AbstractCLI.log.warn(
                            "Selected factor looks like a batch, and 'ignoreBatch' is true, skipping:" + factor);
                    continue;
                }

                factors.add(factor);
            }
        }

        return factors;
    }

    /**
     * Run the analysis using configuration based on an old analysis.
     */
    private Collection<DifferentialExpressionAnalysis> tryToRedoBasedOnOldAnalysis(ExpressionExperiment ee) {
        Collection<DifferentialExpressionAnalysis> oldAnalyses = differentialExpressionAnalysisService
                .findByInvestigation(ee);

        if (oldAnalyses.isEmpty()) {
            throw new IllegalArgumentException("There are no old analyses to redo");
        }

        AbstractCLI.log.info("Will attempt to redo " + oldAnalyses.size() + " analyses for " + ee);
        Collection<DifferentialExpressionAnalysis> results = new HashSet<>();
        for (DifferentialExpressionAnalysis copyMe : oldAnalyses) {
            results.addAll(this.differentialExpressionAnalyzerService.redoAnalysis(ee, copyMe, this.persist));
        }
        return results;

    }

}