chibi.gemmaanalysis.cli.deprecated.StringVectorCleanup.java Source code

Java tutorial

Introduction

Here is the source code for chibi.gemmaanalysis.cli.deprecated.StringVectorCleanup.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2007 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package chibi.gemmaanalysis.cli.deprecated;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.lang3.StringUtils;

import ubic.basecode.io.ByteArrayConverter;
import ubic.gemma.apps.ExpressionExperimentManipulatingCLI;
import ubic.gemma.apps.GemmaCLI.CommandGroup;
import ubic.gemma.datastructure.matrix.VectorMarshall;
import ubic.gemma.model.common.quantitationtype.PrimitiveType;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.common.quantitationtype.QuantitationTypeService;
import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector;
import ubic.gemma.model.expression.bioAssayData.DesignElementDataVectorService;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

/**
 * Remove tabs from strings stored in the database. Can also check all vectors for correct sizes (a useful database
 * check, but slow). This is more or less a one-off, it was used to clean up errors that shouldn't happen any more (!)
 * 
 * @author pavlidis
 * @version $Id$
 */
@Deprecated
public class StringVectorCleanup extends ExpressionExperimentManipulatingCLI {

    @SuppressWarnings("static-access")
    @Override
    protected void buildOptions() {
        super.buildOptions();
        this.addOption(OptionBuilder
                .withDescription("Examine ALL vectors for correct sizes, "
                        + "not just string types. Slow but useful check of the integrity of the system")
                .create('f'));
    }

    @Override
    public CommandGroup getCommandGroup() {
        return CommandGroup.DEPRECATED;
    }

    @Override
    protected void processOptions() {
        super.processOptions();
        if (this.hasOption('f')) {
            this.fullCheck = true;
            log.info("A full check of all vectors will be done");
        }
    }

    DesignElementDataVectorService dedvs;
    QuantitationTypeService qts;
    private boolean fullCheck = false;

    @Override
    protected Exception doWork(String[] args) {
        Exception e = processCommandLine(args);
        if (e != null)
            return e;

        qts = this.getBean(QuantitationTypeService.class);

        dedvs = this.getBean(DesignElementDataVectorService.class);

        for (BioAssaySet ee : expressionExperiments) {
            processExperiment(ee);
        }

        summarizeProcessing();
        return null;

    }

    /**
     * @param ee
     */
    @SuppressWarnings("unchecked")
    private void processExperiment(BioAssaySet bas) {
        ExpressionExperiment ee = (ExpressionExperiment) bas;
        Collection<QuantitationType> types = this.eeService.getQuantitationTypes(ee);

        ByteArrayConverter converter = new ByteArrayConverter();

        qtype: for (QuantitationType type : types) {
            boolean isStringType = type.getRepresentation().equals(PrimitiveType.STRING);
            if (!isStringType && !fullCheck)
                continue;

            log.info("Processing " + type);
            Collection<? extends DesignElementDataVector> vecs = dedvs.find(type);
            dedvs.thaw(vecs);

            boolean changed = false;
            int count = 0;
            for (DesignElementDataVector vector : vecs) {

                if (isStringType) {
                    byte[] dat = vector.getData();

                    int numBioAssays = vector.getBioAssayDimension().getBioAssays().size();
                    String[] rawStrings = converter.byteArrayToStrings(dat);
                    List<String> updated = new ArrayList<String>();
                    for (String string : rawStrings) {
                        if (string.equals("\t")) {
                            changed = true;
                        } else {
                            updated.add(string);
                        }
                    }

                    if (updated.size() != numBioAssays) {
                        dedvs.thaw((Collection<? extends DesignElementDataVector>) vector);
                        log.error("Vector " + vector.getId()
                                + " did not have right number of values after 'tab' removal for " + type
                                + "; expected " + numBioAssays + " got " + updated.size() + "; "
                                + vector.getExpressionExperiment());
                        continue qtype;
                    }

                    if (changed) {
                        byte[] newDat = converter.toBytes(updated.toArray(new String[] {}));
                        vector.setData(newDat);
                    }

                } else if (fullCheck) {
                    List<Object> vec = VectorMarshall.marshall(vector);
                    int numBioAssays = vector.getBioAssayDimension().getBioAssays().size();
                    if (vec.size() != numBioAssays) {
                        dedvs.thaw((Collection<? extends DesignElementDataVector>) vector);
                        eeService.thawLite(vector.getExpressionExperiment());
                        log.error("Vector " + vector.getId() + " did not have right number of values  " + type
                                + "; expected " + numBioAssays + " got " + vec.size() + "; "
                                + vector.getExpressionExperiment());
                        log.error("Values:\n" + StringUtils.join(vec, ","));
                        continue qtype;
                    }
                }
                if (++count % 10000 == 0) {
                    log.info("Processed " + count + " vectors for " + type);
                }
            }

            if (changed) {
                log.info("Updating " + vecs.size() + " vectors that may have contained 'tab'.");
                dedvs.update(vecs);
            }

        }
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
        StringVectorCleanup c = new StringVectorCleanup();
        Exception e = c.doWork(args);
        if (e != null) {
            log.fatal(e, e);
        }

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#getCommandName()
     */
    @Override
    public String getCommandName() {
        return null;
    }
}