ubic.gemma.apps.LoadSimpleExpressionDataCli.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.apps.LoadSimpleExpressionDataCli.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2006 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.gemma.apps;

import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.loader.expression.simple.SimpleExpressionDataLoaderService;
import ubic.gemma.loader.expression.simple.model.SimpleExpressionExperimentMetaData;
import ubic.gemma.model.common.quantitationtype.GeneralType;
import ubic.gemma.model.common.quantitationtype.ScaleType;
import ubic.gemma.model.common.quantitationtype.StandardQuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.util.AbstractCLIContextCLI;

import java.io.*;
import java.util.Collection;
import java.util.HashSet;

/**
 * Command Line tools for loading the expression experiment in flat files
 * 
 * @author xiangwan
 * @version $Id: LoadSimpleExpressionDataCli.java,v 1.21 2013/02/18 18:36:47 anton Exp $
 */
public class LoadSimpleExpressionDataCli extends AbstractCLIContextCLI {

    private String fileName = null;
    private String dirName = "./";
    private SimpleExpressionDataLoaderService eeLoaderService = null;
    private ArrayDesignService adService = null;
    private TaxonService taxonService = null;
    ExpressionExperimentService eeService;
    final static String SPLITCHAR = "\t{1}";
    final static int NAMEI = 0;
    final static int SHORTNAMEI = NAMEI + 1;
    final static int DESCRIPTIONI = SHORTNAMEI + 1;
    final static int AD_SHORT_NAME_I = DESCRIPTIONI + 1; // The short name of the arrayDesign
    final static int DATAFILEI = AD_SHORT_NAME_I + 1;
    final static int SPECIESI = DATAFILEI + 1;
    final static int QNAMEI = SPECIESI + 1;
    final static int QDESCRIPTIONI = QNAMEI + 1;
    final static int QTYPEI = QDESCRIPTIONI + 1;
    final static int QSCALEI = QTYPEI + 1;
    final static int PUBMEDI = QSCALEI + 1;
    final static int SOURCEI = PUBMEDI + 1;
    final static int ARRAYDESIGNNAMEI = SOURCEI + 1;
    final static int TECHNOLOGYTYPEI = ARRAYDESIGNNAMEI + 1;
    // final static int IMAGECLONEI = QSCALEI + 1;
    final static int TOTALFIELDS = TECHNOLOGYTYPEI + 1;

    /**
     * @param args
     */
    public static void main(String[] args) {
        LoadSimpleExpressionDataCli p = new LoadSimpleExpressionDataCli();
        StopWatch watch = new StopWatch();
        watch.start();
        try {
            Exception ex = p.doWork(args);
            if (ex != null) {
                ex.printStackTrace();
            }
            watch.stop();
            log.info(watch.getTime());
        } catch (Exception e) {
            log.fatal(e, e);
            throw new RuntimeException(e);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#buildOptions()
     */
    @SuppressWarnings("static-access")
    @Override
    protected void buildOptions() {
        Option fileOption = OptionBuilder.isRequired().hasArg().withArgName("File Name")
                .withDescription("the list of experiments in flat file").withLongOpt("file").create('f');
        addOption(fileOption);

        Option dirOption = OptionBuilder.hasArg().withArgName("File Folder")
                .withDescription("The folder for containing the experiment files").withLongOpt("dir").create('d');
        addOption(dirOption);

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#doWork(java.lang.String[])
     */
    @Override
    protected Exception doWork(String[] args) {
        Exception err = processCommandLine("Expression Data loader", args);
        if (err != null) {
            return err;
        }
        try {
            this.eeLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
            this.eeService = this.getBean(ExpressionExperimentService.class);
            this.adService = this.getBean(ArrayDesignService.class);
            this.taxonService = this.getBean(TaxonService.class);
            if (this.fileName != null) {
                log.info("Loading experiments from " + this.fileName);
                InputStream is = new FileInputStream(new File(this.dirName, this.fileName));
                BufferedReader br = new BufferedReader(new InputStreamReader(is));
                String conf = null;
                while ((conf = br.readLine()) != null) {

                    if (StringUtils.isBlank(conf)) {
                        continue;
                    }

                    /* Comments in the list file */
                    if (conf.startsWith("#"))
                        continue;

                    String expName = conf.split(SPLITCHAR)[0];

                    try {
                        this.loadExperiment(conf);
                        log.info("Successfully Loaded " + expName);
                        successObjects.add(expName);
                    } catch (Exception e) {
                        errorObjects.add(expName + ": " + e.getMessage());
                        log.error("Failure loading " + expName, e);
                    }
                }
                summarizeProcessing();
            }
        } catch (IOException e) {
            return e;
        }
        return null;
    }

    @Override
    protected void processOptions() {
        super.processOptions();
        if (hasOption('f')) {
            fileName = getOptionValue('f');
        }

        if (hasOption('d')) {
            dirName = getOptionValue('d');
        }
    }

    /**
     * @param fields
     */
    private void checkForArrayDesignName(String[] fields) {
        if (StringUtils.isBlank(fields[ARRAYDESIGNNAMEI])) {
            throw new IllegalArgumentException("Array design must be given if array design is new.");
        }
    }

    /**
     * @param fields
     * @param metaData
     */
    private void configureArrayDesigns(String[] fields, SimpleExpressionExperimentMetaData metaData) {
        int i;
        TechnologyType techType = TechnologyType.fromString(fields[TECHNOLOGYTYPEI]);
        Collection<ArrayDesign> ads = new HashSet<ArrayDesign>();
        if (StringUtils.isBlank(fields[AD_SHORT_NAME_I])) {
            // that's okay, so long as we get an array design name
            ArrayDesign ad = getNewArrayDesignFromName(fields);
            ad.setTechnologyType(techType);
            ad.setPrimaryTaxon(metaData.getTaxon());
            ads.add(ad);
        } else if (fields[AD_SHORT_NAME_I].trim().equals("IMAGE")) {
            ArrayDesign ad = getNewArrayDesignFromName(fields);
            ad.setTechnologyType(techType);
            ad.setPrimaryTaxon(metaData.getTaxon());
            ads.add(ad);
            metaData.setProbeIdsAreImageClones(true);
        } else {
            String allADs[] = fields[AD_SHORT_NAME_I].split("\\+");

            // allow for the case where there is an additional new array design to be added.
            if (StringUtils.isNotBlank(fields[ARRAYDESIGNNAMEI])) {
                ArrayDesign ad = getNewArrayDesignFromName(fields);
                ad.setTechnologyType(techType);
                ads.add(ad);
            }

            for (i = 0; i < allADs.length; i++) {
                ArrayDesign ad = adService.findByShortName(allADs[i]);

                if (ad == null) {
                    Collection<ArrayDesign> existingAds = adService.findByAlternateName(allADs[i]);
                    if (existingAds.size() == 1) {
                        ad = existingAds.iterator().next();
                    } else if (existingAds.size() > 1) {
                        throw new IllegalStateException("Array Design " + allADs[i]
                                + " is ambiguous, it is an alternate name of more than one array design");
                    }
                }

                if (ad == null) {
                    throw new IllegalStateException("Array Design " + allADs[i]
                            + " is not loaded into the system yet; load it and try again.");
                }
                ads.add(ad);
            }
        }
        metaData.setArrayDesigns(ads);

    }

    /**
     * @param fields
     * @param metaData
     */
    private void configureQuantitationType(String[] fields, SimpleExpressionExperimentMetaData metaData) {
        metaData.setQuantitationTypeName(fields[QNAMEI]);
        metaData.setQuantitationTypeDescription(fields[QDESCRIPTIONI]);
        metaData.setGeneralType(GeneralType.QUANTITATIVE);

        StandardQuantitationType sQType = StandardQuantitationType.fromString(fields[QTYPEI]);
        metaData.setType(sQType);

        ScaleType sType = ScaleType.fromString(fields[QSCALEI]);
        metaData.setScale(sType);
    }

    /**
     * @param fields
     * @param metaData
     */
    private void configureTaxon(String[] fields, SimpleExpressionExperimentMetaData metaData) {
        Taxon taxon = Taxon.Factory.newInstance();
        taxon.setScientificName(fields[SPECIESI]);
        Taxon existing = taxonService.find(taxon);
        if (existing == null) {
            throw new IllegalArgumentException("There is no taxon with scientific name " + fields[SPECIESI]
                    + " in the system; please add it first before loading data.");
        }
        metaData.setTaxon(taxon);
    }

    /**
     * @param fields
     * @return
     */
    private ArrayDesign getNewArrayDesignFromName(String[] fields) {
        checkForArrayDesignName(fields);
        ArrayDesign ad = ArrayDesign.Factory.newInstance();
        ad.setName(fields[ARRAYDESIGNNAMEI]);
        ad.setShortName(ad.getName());
        return ad;
    }

    /**
     * @param configurationLine
     * @return
     * @throws Exception
     */
    private void loadExperiment(String configurationLine) throws Exception {
        int i = 0;
        String fields[] = configurationLine.split(SPLITCHAR);
        if (fields.length != TOTALFIELDS) {
            throw new IllegalArgumentException("Field Missing Got[" + fields.length + "]: " + configurationLine);
        }
        for (i = 0; i < fields.length; i++)
            fields[i] = StringUtils.trim(fields[i]);

        SimpleExpressionExperimentMetaData metaData = new SimpleExpressionExperimentMetaData();

        String shortName = fields[SHORTNAMEI];

        ExpressionExperiment existing = eeService.findByShortName(shortName);

        if (existing != null) {
            throw new IllegalArgumentException("There is already an experiment with short name " + shortName
                    + "; please choose something unique.");
        }

        metaData.setName(fields[NAMEI]);

        metaData.setShortName(shortName);
        metaData.setDescription(fields[DESCRIPTIONI]);

        configureArrayDesigns(fields, metaData);

        configureTaxon(fields, metaData);

        InputStream data = new FileInputStream(new File(this.dirName, fields[DATAFILEI]));

        metaData.setSourceUrl(fields[SOURCEI]);

        String pubMedId = fields[PUBMEDI];
        if (StringUtils.isNotBlank(pubMedId)) {
            metaData.setPubMedId(Integer.parseInt(pubMedId));
        }

        configureQuantitationType(fields, metaData);

        ExpressionExperiment ee = eeLoaderService.create(metaData, data);

        ee = eeService.thawLite(ee);

    }

}