gov.nih.nci.caarray.application.translation.geosoft.GeoSoftExporterBean.java Source code

Java tutorial

Introduction

Here is the source code for gov.nih.nci.caarray.application.translation.geosoft.GeoSoftExporterBean.java

Source

//======================================================================================
// Copyright 5AM Solutions Inc, Yale University
//
// Distributed under the OSI-approved BSD 3-Clause License.
// See http://ncip.github.com/caarray/LICENSE.txt for details.
//======================================================================================
package gov.nih.nci.caarray.application.translation.geosoft;

import gov.nih.nci.caarray.application.ExceptionLoggingInterceptor;
import gov.nih.nci.caarray.application.fileaccess.FileAccessUtils;
import gov.nih.nci.caarray.domain.array.ArrayDesign;
import gov.nih.nci.caarray.domain.data.AbstractArrayData;
import gov.nih.nci.caarray.domain.data.DerivedArrayData;
import gov.nih.nci.caarray.domain.data.RawArrayData;
import gov.nih.nci.caarray.domain.file.CaArrayFile;
import gov.nih.nci.caarray.domain.hybridization.Hybridization;
import gov.nih.nci.caarray.domain.project.Experiment;
import gov.nih.nci.caarray.domain.project.Project;
import gov.nih.nci.caarray.domain.protocol.ProtocolApplication;
import gov.nih.nci.caarray.domain.sample.AbstractBioMaterial;
import gov.nih.nci.caarray.domain.sample.Extract;
import gov.nih.nci.caarray.domain.sample.LabeledExtract;
import gov.nih.nci.caarray.domain.sample.Sample;
import gov.nih.nci.caarray.domain.sample.Source;
import gov.nih.nci.caarray.injection.InjectionInterceptor;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.ejb.Local;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.interceptor.Interceptors;

import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.io.output.CloseShieldOutputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.google.inject.Inject;

/**
 * Export valid Experiments to a GEO SOFT format. Note: This class doesn't need to be an EJB.
 *
 * @author gax
 * @since 2.3.1
 */
@Local(GeoSoftExporter.class)
@Stateless
@Interceptors({ ExceptionLoggingInterceptor.class, InjectionInterceptor.class })
public class GeoSoftExporterBean implements GeoSoftExporter {
    private static final Logger LOG = Logger.getLogger(GeoSoftExporterBean.class);
    private static final String AFFYMETRIX_PROVIDER = "Affymetrix";
    static final String AFFYMETRIX_CHP_TYPE_NAME = "AFFYMETRIX_CHP";

    // CHECKSTYLE:OFF magic numbers
    private static final int ONE_KB = 1024;

    // CHECKSTYLE:ON

    private FileAccessUtils fileAccessHelper;

    /**
     * {@inheritDoc}
     */
    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public List<String> validateForExport(Experiment experiment) {

        final List<String> errors = new ArrayList<String>();
        final boolean stop = checkArrayDesigns(errors, experiment);
        if (stop) {
            return errors;
        }

        final Set<String> protocolErrors = new HashSet<String>();
        for (final Hybridization hyb : experiment.getHybridizations()) {
            final Set<Source> sources = new HashSet<Source>();
            final Set<Sample> samples = new HashSet<Sample>();
            final Set<Extract> extracts = new HashSet<Extract>();
            final Set<LabeledExtract> labeledExtracts = new HashSet<LabeledExtract>();
            GeoSoftFileWriterUtil.collectBioMaterials(hyb, sources, samples, extracts, labeledExtracts);

            checkRawData(errors, hyb);
            checkDerivedDataFileType(errors, hyb);

            checkBioProtocol(protocolErrors, samples, "nucleic_acid_extraction");
            checkBioProtocol(protocolErrors, extracts, "labeling");
            checkBioProtocol(protocolErrors, labeledExtracts, "hybridization");
            checkProtocol(protocolErrors, hyb.getProtocolApplications(), "scan", "image_acquisition");
            checkDataProcessingProtocol(protocolErrors, hyb.getRawDataCollection());
            checkCharOrFactorValue(errors, hyb, labeledExtracts, extracts, samples, sources);
            checkLabeledExtract(errors, labeledExtracts, extracts);

        }
        errors.addAll(protocolErrors);

        return errors;
    }

    /**
     * @return true if this experiment's design provider is not Affy (not beed to do more validations).
     */
    private boolean checkArrayDesigns(List<String> errors, Experiment experiment) {
        if (experiment.getArrayDesigns().isEmpty()) {
            errors.add("No (" + AFFYMETRIX_PROVIDER + ") array design specified");
            return true;
        }
        for (final ArrayDesign ad : experiment.getArrayDesigns()) {
            // * The array provider should be Affymetrix.
            if (!AFFYMETRIX_PROVIDER.equals(ad.getProvider().getName())) {
                errors.add(AFFYMETRIX_PROVIDER + " is not the provider for array design " + ad.getName());
            }
            if (!errors.isEmpty()) {
                return true;
            }
            // * All array designs associated with the experiment must be ones for which the System has the GEO
            // accession.
            if (StringUtils.isBlank(ad.getGeoAccession())) {
                errors.add("Array design " + ad.getName() + " has no GEO accession");
            }
        }
        return false;
    }

    @SuppressWarnings("PMD.ExcessiveParameterList")
    private void checkCharOrFactorValue(List<String> errors, Hybridization hyb, Set<LabeledExtract> labeledExtracts,
            Set<Extract> extracts, Set<Sample> samples, Set<Source> sources) {
        // * There must be at least 1 characteristic or factor value that is present in every
        // biomaterial-hybridization chain.
        if (hyb.getFactorValues().isEmpty() && !hasCharacteristics(labeledExtracts) && !hasCharacteristics(extracts)
                && !hasCharacteristics(samples) && !hasCharacteristics(sources)) {
            errors.add("Hybridization " + hyb.getName()
                    + " and associated biomaterials must have at least one characteristic or factor value");
        }
    }

    @SuppressWarnings("empty-statement")
    private static boolean hasCharacteristics(Set<? extends AbstractBioMaterial> bios) {
        for (final AbstractBioMaterial bio : bios) {
            if (!bio.getCharacteristics().isEmpty() || bio.getTissueSite() != null || bio.getDiseaseState() != null
                    || bio.getCellType() != null || StringUtils.isNotBlank(bio.getExternalId())) {
                return true;
            }
        }
        return false;
    }

    private void checkDerivedDataFileType(List<String> errors, Hybridization hyb) {
        // * Every hybridization must have a derived data file of type AFFYMETRIX_CHP.
        for (final DerivedArrayData dad : hyb.getDerivedDataCollection()) {
            if (AFFYMETRIX_CHP_TYPE_NAME.equals(dad.getDataFile().getFileType().getName())) {
                return;
            }
        }
        errors.add("Hybridization " + hyb.getName() + " must have a derived data file of type "
                + AFFYMETRIX_CHP_TYPE_NAME);
    }

    private void checkLabeledExtract(List<String> errors, Set<LabeledExtract> labeledExtracts,
            Set<Extract> extracts) {
        // * For every chain, the Material Type of the extract or labeled extract must be present.
        boolean foundMaterialType = false;
        for (final LabeledExtract le : labeledExtracts) {
            if (le.getMaterialType() != null) {
                foundMaterialType = true;
            }
            if (le.getLabel() == null) {
                errors.add("Labeled Extract " + le.getName() + " must have a label");
            }
        }
        for (final Extract e : extracts) {
            if (e.getMaterialType() != null) {
                foundMaterialType = true;
                break;
            }
        }
        if (!foundMaterialType) {
            errors.add("Material Type not set on Labeled Extract or Extract");
        }
    }

    private void checkRawData(List<String> errors, Hybridization hyb) {
        // * Every hybridization must have at least one raw data file.
        if (hyb.getRawDataCollection().isEmpty()) {
            errors.add("Hybridization " + hyb.getName() + " must have at least one Raw Data File");
        }
    }

    private void checkProtocol(Set<String> errors, List<ProtocolApplication> protocolApps, String... protocols) {
        for (final ProtocolApplication pa : protocolApps) {
            final String pType = pa.getProtocol().getType().getValue();
            for (final String p : protocols) {
                if (pType.equalsIgnoreCase(p)) {
                    return;
                }
            }
        }
        errors.add("Missing protocol (one of " + Arrays.asList(protocols) + " needed)");
    }

    private void checkBioProtocol(Set<String> errors, Set<? extends AbstractBioMaterial> bios,
            String... protocols) {
        final List<ProtocolApplication> all = new ArrayList<ProtocolApplication>();
        for (final AbstractBioMaterial bio : bios) {
            all.addAll(bio.getProtocolApplications());
        }
        checkProtocol(errors, all, protocols);
    }

    private void checkDataProcessingProtocol(Set<String> errors, Set<RawArrayData> rawDataCollection) {
        for (final RawArrayData rad : rawDataCollection) {
            if (rad.getProtocolApplications().isEmpty()) {
                errors.add("Missing data processing protocol");
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public List<PackagingInfo> getAvailablePackagingInfos(Project project) {
        final List<PackagingInfo> infos = new ArrayList<PackagingInfo>();
        final Experiment experiment = project.getExperiment();
        String name = experiment.getPublicIdentifier() + PackagingInfo.PackagingMethod.TGZ.getExtension();
        PackagingInfo.PackagingMethod method = PackagingInfo.PackagingMethod.TGZ;
        infos.add(new PackagingInfo(name, method));

        final long size = getEstimatedPackageSize(experiment);
        if (size < PackagingInfo.MAX_ZIP_SIZE) {
            name = experiment.getPublicIdentifier() + PackagingInfo.PackagingMethod.ZIP.getExtension();
            method = PackagingInfo.PackagingMethod.ZIP;
            infos.add(new PackagingInfo(name, method));
        }
        return infos;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public void writeGeoSoftFile(Project project, String permaLinkUrl, PrintWriter out) throws IOException {
        if (!validateForExport(project.getExperiment()).isEmpty()) {
            throw new IllegalArgumentException("experiment not valid for export");
        }
        GeoSoftFileWriterUtil.writeSoftFile(project.getExperiment(), permaLinkUrl, out);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    @TransactionAttribute(TransactionAttributeType.SUPPORTS)
    public void export(Project project, String permaLinkUrl, PackagingInfo.PackagingMethod method, OutputStream out)
            throws IOException {

        final OutputStream closeShield = new CloseShieldOutputStream(out);
        final Experiment experiment = project.getExperiment();
        boolean addReadMe = false;
        if (method == PackagingInfo.PackagingMethod.TGZ) {
            addReadMe = true;
        } else {
            ensureZippable(project);
        }

        final ArchiveOutputStream arOut = method.createArchiveOutputStream(closeShield);
        try {
            exportArchive(experiment, permaLinkUrl, addReadMe, arOut);
        } finally {
            // note that the caller's stream is shielded from the close(),
            // but this is the only way to finish and flush the (gzip) stream.
            IOUtils.closeQuietly(arOut);
        }
    }

    private void ensureZippable(Project project) {
        final List<PackagingInfo> infos = getAvailablePackagingInfos(project);
        for (final PackagingInfo pi : infos) {
            if (pi.getMethod() == PackagingInfo.PackagingMethod.ZIP) {
                return;
            }
        }
        throw new IllegalArgumentException("experiment files are too large for a standard ZIP package");

    }

    @SuppressWarnings({ "PMD.AvoidInstanceofChecksInCatchClause", "PMD.AvoidThrowingRawExceptionTypes" })
    private void exportArchive(Experiment experiment, String permaLinkUrl, boolean addReadme,
            ArchiveOutputStream ar) throws IOException {
        if (!validateForExport(experiment).isEmpty()) {
            throw new IllegalArgumentException("experiment not valid for export");
        }

        generateSoftFile(experiment, permaLinkUrl, ar);
        addDataFiles(experiment, ar);
        if (addReadme) {
            addReadmeFile(ar);
        }
    }

    private void generateSoftFile(Experiment experiment, String permaLinkUrl, ArchiveOutputStream zout)
            throws IOException {
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final Writer w = new OutputStreamWriter(bout, "UTF-8");
        final PrintWriter out = new PrintWriter(w);
        GeoSoftFileWriterUtil.writeSoftFile(experiment, permaLinkUrl, out);
        out.close();

        final ArchiveEntry ae = this.fileAccessHelper.createArchiveEntry(zout,
                experiment.getPublicIdentifier() + ".soft.txt", bout.size());
        zout.putArchiveEntry(ae);
        bout.writeTo(zout);
        zout.closeArchiveEntry();
    }

    private long getEstimatedPackageSize(Experiment experiment) {
        long size = 0;
        for (final Hybridization h : experiment.getHybridizations()) {
            for (final RawArrayData rad : h.getRawDataCollection()) {
                size += rad.getDataFile().getCompressedSize();
            }
            for (final DerivedArrayData dad : h.getDerivedDataCollection()) {
                size += dad.getDataFile().getCompressedSize();
            }
        }
        for (final CaArrayFile f : experiment.getProject().getUserVisibleSupplementalFiles()) {
            size += f.getCompressedSize();
        }

        // estimate GEO SOFT file size and zip entry overhead.
        size += experiment.getHybridizations().size() * ONE_KB;
        return size;
    }

    private void addDataFiles(Experiment experiment, ArchiveOutputStream zout) throws IOException {
        for (final Hybridization h : experiment.getHybridizations()) {
            addDataFiles(h.getRawDataCollection(), zout);
            addDataFiles(h.getDerivedDataCollection(), zout);
        }
        for (final CaArrayFile f : experiment.getProject().getUserVisibleSupplementalFiles()) {
            this.fileAccessHelper.addFileToArchive(f, zout);
        }
    }

    private void addDataFiles(Set<? extends AbstractArrayData> dataCollection, ArchiveOutputStream zout)
            throws IOException {
        for (final AbstractArrayData aad : dataCollection) {
            final CaArrayFile f = aad.getDataFile();
            this.fileAccessHelper.addFileToArchive(f, zout);
        }
    }

    private void addReadmeFile(ArchiveOutputStream ar) throws IOException {
        final InputStream is = GeoSoftExporterBean.class.getResourceAsStream("README.txt");
        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
        IOUtils.copy(is, baos);
        is.close();

        final ArchiveEntry ae = this.fileAccessHelper.createArchiveEntry(ar, "README.txt", baos.size());
        ar.putArchiveEntry(ae);
        baos.writeTo(ar);
        ar.closeArchiveEntry();
    }

    /**
     * @param fileAccessHelper the fileAccessHelper to set
     */
    @Inject
    public void setFileAccessHelper(FileAccessUtils fileAccessHelper) {
        this.fileAccessHelper = fileAccessHelper;
    }
}