gov.nih.nci.caarray.services.external.v1_0.data.impl.DataServiceBean.java Source code

Java tutorial

Introduction

Here is the source code for gov.nih.nci.caarray.services.external.v1_0.data.impl.DataServiceBean.java

Source

//======================================================================================
// Copyright 5AM Solutions Inc, Yale University
//
// Distributed under the OSI-approved BSD 3-Clause License.
// See http://ncip.github.com/caarray/LICENSE.txt for details.
//======================================================================================
package gov.nih.nci.caarray.services.external.v1_0.data.impl;

import gov.nih.nci.caarray.application.ConfigurationHelper;
import gov.nih.nci.caarray.application.ServiceLocatorFactory;
import gov.nih.nci.caarray.application.fileaccess.TemporaryFileCache;
import gov.nih.nci.caarray.dao.ArrayDao;
import gov.nih.nci.caarray.dataStorage.DataStorageFacade;
import gov.nih.nci.caarray.dataStorage.ParsedDataPersister;
import gov.nih.nci.caarray.domain.ConfigParamEnum;
import gov.nih.nci.caarray.domain.data.AbstractArrayData;
import gov.nih.nci.caarray.domain.data.AbstractDataColumn;
import gov.nih.nci.caarray.domain.data.DerivedArrayData;
import gov.nih.nci.caarray.domain.data.DesignElementList;
import gov.nih.nci.caarray.domain.data.HybridizationData;
import gov.nih.nci.caarray.domain.data.RawArrayData;
import gov.nih.nci.caarray.domain.file.CaArrayFile;
import gov.nih.nci.caarray.domain.file.FileType;
import gov.nih.nci.caarray.domain.file.FileTypeRegistry;
import gov.nih.nci.caarray.domain.hybridization.Hybridization;
import gov.nih.nci.caarray.domain.project.Experiment;
import gov.nih.nci.caarray.domain.search.AdHocSortCriterion;
import gov.nih.nci.caarray.external.v1_0.CaArrayEntityReference;
import gov.nih.nci.caarray.external.v1_0.data.DataSet;
import gov.nih.nci.caarray.external.v1_0.data.DesignElement;
import gov.nih.nci.caarray.external.v1_0.data.File;
import gov.nih.nci.caarray.external.v1_0.data.FileContents;
import gov.nih.nci.caarray.external.v1_0.data.FileMetadata;
import gov.nih.nci.caarray.external.v1_0.data.FileStreamableContents;
import gov.nih.nci.caarray.external.v1_0.data.MageTabFileSet;
import gov.nih.nci.caarray.external.v1_0.data.QuantitationType;
import gov.nih.nci.caarray.external.v1_0.query.DataSetRequest;
import gov.nih.nci.caarray.injection.InjectionInterceptor;
import gov.nih.nci.caarray.magetab.MageTabDocumentSet;
import gov.nih.nci.caarray.magetab.sdrf.ArrayDataFile;
import gov.nih.nci.caarray.magetab.sdrf.ArrayDataMatrixFile;
import gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataFile;
import gov.nih.nci.caarray.magetab.sdrf.DerivedArrayDataMatrixFile;
import gov.nih.nci.caarray.magetab.sdrf.SdrfDocument;
import gov.nih.nci.caarray.services.AuthorizationInterceptor;
import gov.nih.nci.caarray.services.HibernateSessionInterceptor;
import gov.nih.nci.caarray.services.StorageInterceptor;
import gov.nih.nci.caarray.services.external.v1_0.InvalidInputException;
import gov.nih.nci.caarray.services.external.v1_0.InvalidReferenceException;
import gov.nih.nci.caarray.services.external.v1_0.data.DataService;
import gov.nih.nci.caarray.services.external.v1_0.data.DataTransferException;
import gov.nih.nci.caarray.services.external.v1_0.data.InconsistentDataSetsException;
import gov.nih.nci.caarray.services.external.v1_0.impl.BaseV1_0ExternalService;
import gov.nih.nci.caarray.util.CaArrayHibernateHelper;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.annotation.security.PermitAll;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.interceptor.Interceptors;

import org.apache.commons.collections.ListUtils;
import org.apache.commons.configuration.DataConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.jboss.ejb3.annotation.RemoteBinding;
import org.jboss.ejb3.annotation.TransactionTimeout;

import com.fiveamsolutions.nci.commons.data.search.PageSortParams;
import com.google.inject.Inject;
import com.healthmarketscience.rmiio.RemoteInputStreamServer;
import com.healthmarketscience.rmiio.SimpleRemoteInputStream;

/**
 * @author dkokotov
 * 
 */
@Stateless(name = "DataServicev1_0")
@PermitAll
@RemoteBinding(jndiBinding = DataService.JNDI_NAME)
@TransactionAttribute(TransactionAttributeType.REQUIRED)
@TransactionTimeout(DataServiceBean.TIMEOUT_SECONDS)
@Interceptors({ AuthorizationInterceptor.class, StorageInterceptor.class, HibernateSessionInterceptor.class,
        InjectionInterceptor.class })
@SuppressWarnings("PMD")
public class DataServiceBean extends BaseV1_0ExternalService implements DataService {
    private static final Logger LOG = Logger.getLogger(DataServiceBean.class);

    static final int TIMEOUT_SECONDS = 1800;
    static final long MAX_FILE_REQUEST_SIZE = 1024 * 1024 * 1024; // 1 GB

    private CaArrayHibernateHelper hibernateHelper;
    private ArrayDao arrayDao;
    private DataStorageFacade dataStorageFacade;
    private TemporaryFileCache temporaryFileCache;
    private ParsedDataPersister parsedDataPersister;

    /**
     * {@inheritDoc}
     */
    @Override
    public DataSet getDataSet(DataSetRequest request)
            throws InvalidReferenceException, InvalidInputException, InconsistentDataSetsException {
        LOG.info("Received data retrieval request");
        checkRequest(request);
        final List<gov.nih.nci.caarray.domain.data.DataSet> dataSets = getDataSets(request);
        final gov.nih.nci.caarray.domain.data.DataSet mergedDataSet = createMergedDataSet(dataSets, request);
        LOG.info("Retrieved " + mergedDataSet.getHybridizationDataList().size() + " hybridization data elements, "
                + mergedDataSet.getQuantitationTypes().size() + " quant types");
        final DataSet externalDataSet = toExternalDataSet(mergedDataSet);
        this.hibernateHelper.getCurrentSession().clear();
        return externalDataSet;
    }

    private DataSet toExternalDataSet(gov.nih.nci.caarray.domain.data.DataSet dataSet) {
        final DataSet externalDataSet = new DataSet();
        mapCollection(dataSet.getQuantitationTypes(), externalDataSet.getQuantitationTypes(),
                QuantitationType.class);
        mapCollection(dataSet.getDesignElementList().getDesignElements(), externalDataSet.getDesignElements(),
                DesignElement.class);
        mapCollection(dataSet.getHybridizationDataList(), externalDataSet.getDatas(),
                gov.nih.nci.caarray.external.v1_0.data.HybridizationData.class);
        return externalDataSet;
    }

    private List<gov.nih.nci.caarray.domain.data.DataSet> getDataSets(DataSetRequest request)
            throws InvalidReferenceException {
        final List<AbstractArrayData> arrayDatas = getArrayDatas(request);
        final List<gov.nih.nci.caarray.domain.data.DataSet> dataSets = new ArrayList<gov.nih.nci.caarray.domain.data.DataSet>(
                arrayDatas.size());
        for (final AbstractArrayData data : arrayDatas) {
            dataSets.add(data.getDataSet());
        }
        return dataSets;
    }

    private gov.nih.nci.caarray.domain.data.DataSet createMergedDataSet(
            List<gov.nih.nci.caarray.domain.data.DataSet> dataSets, DataSetRequest request)
            throws InconsistentDataSetsException, InvalidReferenceException {
        final gov.nih.nci.caarray.domain.data.DataSet dataSet = new gov.nih.nci.caarray.domain.data.DataSet();
        dataSet.getQuantitationTypes().addAll(getQuantitationTypes(request));
        addDesignElementList(dataSet, dataSets);
        addHybridizationDatas(dataSet, dataSets, request);
        return dataSet;
    }

    private void addDesignElementList(gov.nih.nci.caarray.domain.data.DataSet dataSet,
            List<gov.nih.nci.caarray.domain.data.DataSet> dataSets) throws InconsistentDataSetsException {
        if (dataSets.isEmpty()) {
            dataSet.setDesignElementList(new DesignElementList());
        } else if (allDesignElementListsAreConsistent(dataSets)) {
            dataSet.setDesignElementList(dataSets.get(0).getDesignElementList());
        } else {
            throw new InconsistentDataSetsException("The DataSet requested data from inconsistent design elements");
        }
    }

    private boolean allDesignElementListsAreConsistent(List<gov.nih.nci.caarray.domain.data.DataSet> dataSets) {
        final DesignElementList firstList = dataSets.get(0).getDesignElementList();
        for (int i = 1; i < dataSets.size(); i++) {
            final DesignElementList nextList = dataSets.get(i).getDesignElementList();
            if (!ListUtils.isEqualList(firstList.getDesignElements(), nextList.getDesignElements())) {
                return false;
            }
        }
        return true;
    }

    private void addHybridizationDatas(gov.nih.nci.caarray.domain.data.DataSet dataSet,
            List<gov.nih.nci.caarray.domain.data.DataSet> dataSets, DataSetRequest request)
            throws InvalidReferenceException {
        for (final gov.nih.nci.caarray.domain.data.DataSet nextDataSet : dataSets) {
            for (final HybridizationData nextHybridizationData : nextDataSet.getHybridizationDataList()) {
                addHybridizationData(dataSet, nextHybridizationData, request);
            }
        }
    }

    private void addHybridizationData(gov.nih.nci.caarray.domain.data.DataSet dataSet,
            HybridizationData copyFromHybridizationData, DataSetRequest request) throws InvalidReferenceException {
        final HybridizationData hybridizationData = new HybridizationData();
        hybridizationData.setHybridization(copyFromHybridizationData.getHybridization());
        dataSet.getHybridizationDataList().add(hybridizationData);
        copyColumns(hybridizationData, copyFromHybridizationData, getQuantitationTypes(request));
        hybridizationData.setDataSet(dataSet);
    }

    private void copyColumns(HybridizationData hybridizationData, HybridizationData copyFromHybridizationData,
            List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes) {
        for (final gov.nih.nci.caarray.domain.data.QuantitationType type : quantitationTypes) {
            AbstractDataColumn column = copyFromHybridizationData.getColumn(type);
            parsedDataPersister.loadFromStorage(column);
            hybridizationData.getColumns().add(column);
        }
    }

    private void checkRequest(DataSetRequest request) throws InvalidInputException {
        if (request == null) {
            throw new InvalidInputException("DataRetrievalRequest was null");
        } else if (request.getHybridizations().isEmpty() && request.getDataFiles().isEmpty()) {
            throw new InvalidInputException("DataRetrievalRequest didn't specify any Hybridizations or Files");
        } else if (request.getQuantitationTypes().isEmpty()) {
            throw new InvalidInputException("DataRetrievalRequest didn't specify QuantitationTypes");
        }
    }

    private List<AbstractArrayData> getArrayDatas(DataSetRequest request) throws InvalidReferenceException {
        final List<Hybridization> hybridizations = getHybridizations(request);
        final List<CaArrayFile> files = getFiles(request);
        final List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes = getQuantitationTypes(
                request);
        final List<AbstractArrayData> arrayDatas = new ArrayList<AbstractArrayData>(hybridizations.size());

        for (final Hybridization hybridization : hybridizations) {
            addArrayDatas(arrayDatas, hybridization, quantitationTypes);
        }
        for (final CaArrayFile dataFile : files) {
            addArrayDatas(arrayDatas, dataFile, quantitationTypes);
        }
        return arrayDatas;
    }

    private void addArrayDatas(List<AbstractArrayData> arrayDatas, Hybridization hybridization,
            List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes) {
        for (final RawArrayData rawArrayData : hybridization.getRawDataCollection()) {
            if (shouldAddData(arrayDatas, rawArrayData, quantitationTypes)) {
                arrayDatas.add(rawArrayData);
            }
        }
        for (final DerivedArrayData derivedArrayData : hybridization.getDerivedDataCollection()) {
            if (shouldAddData(arrayDatas, derivedArrayData, quantitationTypes)) {
                arrayDatas.add(derivedArrayData);
            }
        }
    }

    private void addArrayDatas(List<AbstractArrayData> arrayDatas, CaArrayFile dataFile,
            List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes) {
        final AbstractArrayData arrayData = this.arrayDao.getArrayData(dataFile.getId());
        if (arrayData != null && shouldAddData(arrayDatas, arrayData, quantitationTypes)) {
            arrayDatas.add(arrayData);
        }
    }

    private boolean shouldAddData(List<AbstractArrayData> arrayDatas, AbstractArrayData arrayData,
            List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes) {
        return arrayData != null && !arrayDatas.contains(arrayData)
                && containsAllTypes(arrayData, quantitationTypes);
    }

    private boolean containsAllTypes(AbstractArrayData arrayData,
            List<gov.nih.nci.caarray.domain.data.QuantitationType> quantitationTypes) {
        return arrayData.getDataSet() != null
                && arrayData.getDataSet().getQuantitationTypes().containsAll(quantitationTypes);
    }

    private List<Hybridization> getHybridizations(DataSetRequest request) throws InvalidReferenceException {
        final List<Hybridization> hybridizations = new ArrayList<Hybridization>(request.getHybridizations().size());
        for (final CaArrayEntityReference hybRef : request.getHybridizations()) {
            hybridizations.add(getRequiredByExternalId(hybRef.getId(), Hybridization.class));
        }
        return hybridizations;
    }

    private List<CaArrayFile> getFiles(DataSetRequest request) throws InvalidReferenceException {
        final List<CaArrayFile> files = new ArrayList<CaArrayFile>(request.getDataFiles().size());
        for (final CaArrayEntityReference fileRef : request.getDataFiles()) {
            files.add(getRequiredByExternalId(fileRef.getId(), CaArrayFile.class));
        }
        return files;
    }

    private List<gov.nih.nci.caarray.domain.data.QuantitationType> getQuantitationTypes(DataSetRequest request)
            throws InvalidReferenceException {
        final List<gov.nih.nci.caarray.domain.data.QuantitationType> types = new ArrayList<gov.nih.nci.caarray.domain.data.QuantitationType>(
                request.getDataFiles().size());
        for (final CaArrayEntityReference qtRef : request.getQuantitationTypes()) {
            types.add(
                    getRequiredByExternalId(qtRef.getId(), gov.nih.nci.caarray.domain.data.QuantitationType.class));
        }
        return types;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public FileStreamableContents streamFileContents(CaArrayEntityReference fileRef, boolean compressed)
            throws InvalidReferenceException, DataTransferException {
        final CaArrayFile caarrayFile = getRequiredByExternalId(fileRef.getId(), CaArrayFile.class);
        RemoteInputStreamServer istream = null;
        final FileStreamableContents fsContents = new FileStreamableContents();
        fsContents.setMetadata(mapEntity(caarrayFile, File.class).getMetadata());
        fsContents.setCompressed(compressed);
        try {
            final InputStream is = this.dataStorageFacade.openInputStream(caarrayFile.getDataHandle(), compressed);
            final DataConfiguration config = ConfigurationHelper.getConfiguration();
            final int packetSize = config.getInt(ConfigParamEnum.FILE_RETRIEVAL_API_CHUNK_SIZE.name());
            istream = new SimpleRemoteInputStream(new BufferedInputStream(is));
            fsContents.setContentStream(istream.export());
            // after all the hard work, discard the local reference (we are passing
            // responsibility to the client)
            istream = null;

            return fsContents;
        } catch (final Exception e) {
            LOG.warn("Could not create input stream for file contents", e);
            throw new DataTransferException("Could not create input stream for file contents");
        } finally {
            // we will only close the stream here if the server fails before
            // returning an exported stream
            if (istream != null) {
                istream.close();
                // ARRAY-1958: close out temp files, if any
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public MageTabFileSet exportMageTab(CaArrayEntityReference experimentRef)
            throws InvalidReferenceException, DataTransferException {
        final gov.nih.nci.caarray.domain.project.Experiment experiment = getRequiredByExternalId(
                experimentRef.getId(), gov.nih.nci.caarray.domain.project.Experiment.class);
        try {
            final MageTabFileSet mageTabSet = new MageTabFileSet();
            final MageTabDocumentSet docSet = exportToMageTab(experiment);
            mageTabSet.setIdf(toFileContents(docSet.getIdfDocuments().iterator().next().getFile().getAsFile(),
                    FileTypeRegistry.MAGE_TAB_IDF));
            mageTabSet.setSdrf(toFileContents(docSet.getSdrfDocuments().iterator().next().getFile().getAsFile(),
                    FileTypeRegistry.MAGE_TAB_SDRF));

            final List<CaArrayFile> dataFiles = getDataFilesReferencedByMageTab(docSet, experiment);
            mapCollection(dataFiles, mageTabSet.getDataFiles(), File.class);

            return mageTabSet;
        } catch (final IOException e) {
            LOG.error("Error exporting to MAGE-TAB", e);
            throw new DataTransferException("Could not generate idf/sdrf");
        }
    }

    private MageTabDocumentSet exportToMageTab(Experiment experiment) {
        final String baseFileName = experiment.getPublicIdentifier();
        final String idfFileName = baseFileName + ".idf";
        final String sdrfFileName = baseFileName + ".sdrf";
        final java.io.File idfFile = this.temporaryFileCache.createFile(idfFileName);
        final java.io.File sdrfFile = this.temporaryFileCache.createFile(sdrfFileName);
        // Translate the experiment and export to the temporary files.
        return ServiceLocatorFactory.getMageTabExporter().exportToMageTab(experiment, idfFile, sdrfFile);
    }

    private List<CaArrayFile> getDataFilesReferencedByMageTab(MageTabDocumentSet mageTab, Experiment experiment) {
        final List<String> fileNames = new ArrayList<String>();
        for (final SdrfDocument sdrfDoc : mageTab.getSdrfDocuments()) {
            for (final ArrayDataFile file : sdrfDoc.getAllArrayDataFiles()) {
                fileNames.add(file.getName());
            }
            for (final DerivedArrayDataFile file : sdrfDoc.getAllDerivedArrayDataFiles()) {
                fileNames.add(file.getName());
            }
            for (final ArrayDataMatrixFile file : sdrfDoc.getAllArrayDataMatrixFiles()) {
                fileNames.add(file.getName());
            }
            for (final DerivedArrayDataMatrixFile file : sdrfDoc.getAllDerivedArrayDataMatrixFiles()) {
                fileNames.add(file.getName());
            }
        }
        final List<CaArrayFile> dataFiles = ServiceLocatorFactory.getGenericDataService().pageAndFilterCollection(
                experiment.getProject().getFiles(), "name", fileNames,
                new PageSortParams<CaArrayFile>(-1, 0, new AdHocSortCriterion<CaArrayFile>("name"), false));
        return dataFiles;
    }

    private FileContents toFileContents(java.io.File mageTabFile, FileType fileType) throws IOException {
        final FileContents fc = new FileContents();
        fc.setContents(FileUtils.readFileToByteArray(mageTabFile));
        fc.setCompressed(false);
        final FileMetadata metadata = new FileMetadata();
        metadata.setName(mageTabFile.getName());
        metadata.setUncompressedSize(mageTabFile.length());
        metadata.setCompressedSize(-1);
        metadata.setFileType(mapEntity(fileType, gov.nih.nci.caarray.external.v1_0.data.FileType.class));
        fc.setMetadata(metadata);
        return fc;
    }

    /**
     * @param hibernateHelper the hibernateHelper to set
     */
    @Inject
    public void setHibernateHelper(CaArrayHibernateHelper hibernateHelper) {
        this.hibernateHelper = hibernateHelper;
    }

    /**
     * @param arrayDao the arrayDao to set
     */
    @Inject
    public void setArrayDao(ArrayDao arrayDao) {
        this.arrayDao = arrayDao;
    }

    /**
     * @param dataStorageFacade the dataStorageFacade to set
     */
    @Inject
    public void setDataStorageFacade(DataStorageFacade dataStorageFacade) {
        this.dataStorageFacade = dataStorageFacade;
    }

    /**
     * @param temporaryFileCache the temporaryFileCache to set
     */
    @Inject
    public void setTemporaryFileCache(TemporaryFileCache temporaryFileCache) {
        this.temporaryFileCache = temporaryFileCache;
    }

    /**
     * @param parsedDataPersister the parsedDataPersister to set
     */
    @Inject
    public void setParsedDataPersister(ParsedDataPersister parsedDataPersister) {
        this.parsedDataPersister = parsedDataPersister;
    }
}