org.ensembl.gti.seqstore.database.cramstore.EnaCramSubmitter.java Source code

Java tutorial

Introduction

Here is the source code for org.ensembl.gti.seqstore.database.cramstore.EnaCramSubmitter.java

Source

/*
 * Copyright 2015 EMBL-European Bioinformatics Institute
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ensembl.gti.seqstore.database.cramstore;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.text.MessageFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContextBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * Class taking care of submission of already uploaded files to ENA
 * @author dstaines
 *
 */
public class EnaCramSubmitter {
    private static XPath xpath = XPathFactory.newInstance().newXPath();

    private final static String SUBMISSION_TEMPLATE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
            + "<SUBMISSION_SET xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
            + "xsi:noNamespaceSchemaLocation=\"ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.submission.xsd\">"
            + "<SUBMISSION alias=\"{0}\"  center_name=\"{1}\">"
            + "<ACTIONS><ACTION><ADD source=\"{2}\" schema=\"analysis\"/></ACTION>"
            + "<ACTION><RELEASE/></ACTION></ACTIONS></SUBMISSION></SUBMISSION_SET>";
    private final static String ANALYSIS_TEMPLATE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
            + "<ANALYSIS_SET><ANALYSIS alias=\"{0}\" ><TITLE>{1}</TITLE><DESCRIPTION>{1}</DESCRIPTION>"
            + "<ANALYSIS_TYPE><REFERENCE_SEQUENCE/></ANALYSIS_TYPE>"
            + "<FILES>{2}</FILES></ANALYSIS></ANALYSIS_SET>";
    private final static String FILE_TEMPLATE = "<FILE filename=\"{0}\" "
            + "filetype=\"fasta\" checksum_method=\"MD5\" checksum=\"{1}\"/>";
    private static final SimpleDateFormat NUMERIC_DATE = new SimpleDateFormat("yyyyMMddHHmmss");
    private static final SimpleDateFormat TEXT_DATE = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");

    protected static String getElemAttrib(Document doc, String tag, String attr) {
        try {
            XPathExpression acc = xpath.compile("//" + tag + "[@" + attr + "]");
            Node nl = (Node) acc.evaluate(doc, XPathConstants.NODE);
            if (nl == null) {
                return null;
            }
            Node attrN = nl.getAttributes().getNamedItem(attr);
            if (attrN == null) {
                return null;
            } else {
                return attrN.getTextContent();
            }
        } catch (XPathExpressionException e) {
            throw new EnaSubmissionException("Could not parse submission receipt", e);
        }
    }

    protected static HttpClient getHttpsClient() {
        try {
            SSLContextBuilder builder = new SSLContextBuilder();
            builder.loadTrustMaterial(null, new TrustSelfSignedStrategy());
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(builder.build());
            return HttpClients.custom().setSSLSocketFactory(sslsf).build();
        } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException e) {
            throw new RuntimeException(e);
        }
    }

    protected static boolean isSuccess(Document doc) {
        try {
            XPathExpression success = xpath.compile("/RECEIPT[@success]");
            Node nl = (Node) success.evaluate(doc, XPathConstants.NODE);
            return "true".equals(nl.getAttributes().getNamedItem("success").getTextContent());
        } catch (XPathExpressionException e) {
            throw new EnaSubmissionException("Could not parse submission receipt", e);
        }
    }

    protected static List<String> getElems(Document doc, String tag) {
        try {
            List<String> elems = new ArrayList<>();
            XPathExpression success = xpath.compile("//" + tag);
            NodeList nl = (NodeList) success.evaluate(doc, XPathConstants.NODESET);
            for (int i = 0; i < nl.getLength(); i++) {
                elems.add(nl.item(i).getTextContent());
            }
            return elems;
        } catch (XPathExpressionException e) {
            throw new EnaSubmissionException("Could not parse submission receipt", e);
        }
    }

    protected static Document parseReceipt(String receipt) {
        Document receiptDoc;
        try {
            receiptDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
                    .parse(IOUtils.toInputStream(receipt));
        } catch (UnsupportedOperationException | SAXException | ParserConfigurationException | IOException e) {
            throw new EnaSubmissionException("Could not parse submission receipt", e);
        }
        return receiptDoc;
    }

    private final Logger log = LoggerFactory.getLogger(this.getClass());
    private final HttpClient httpClient;
    private final String submitUri;
    private final String centre;
    private final String user;
    private final String password;

    public EnaCramSubmitter(String submitUri, String centre, String user, String password) {
        this.httpClient = getHttpsClient();
        this.submitUri = submitUri;
        this.centre = centre;
        this.user = user;
        this.password = password;
    }

    private File getAnalysisXml(Date date, Collection<File> files) throws FileNotFoundException, IOException {
        StringBuilder fileString = new StringBuilder();
        for (File file : files) {
            log.info("Checksumming " + file.getName());
            fileString.append(MessageFormat.format(FILE_TEMPLATE, file.getName(),
                    DigestUtils.md5Hex(new FileInputStream(file))));
        }
        String subId = centre + "_" + NUMERIC_DATE.format(date);
        log.info("Writing analysis as " + subId);
        String xml = MessageFormat.format(ANALYSIS_TEMPLATE, subId,
                centre + " sequence submission " + TEXT_DATE.format(date), fileString.toString());
        log.info("Analysis XML: " + xml);
        File xmlFile = File.createTempFile("ena_analysis_", ".xml");
        FileUtils.write(xmlFile, xml);
        return xmlFile;
    }

    private File getSubmissionXml(Date date, File xml) throws IOException {
        String subId = centre + "_" + NUMERIC_DATE.format(date);
        log.info("Writing submission as " + subId);
        String subXml = MessageFormat.format(SUBMISSION_TEMPLATE, subId, centre, xml.getName());
        log.info("Sub XML: " + subXml);
        File xmlFile = File.createTempFile("enasub_", ".xml");
        FileUtils.write(xmlFile, subXml);
        return xmlFile;
    }

    protected void submitFiles(Collection<File> files) {
        if (files.size() > 0) {
            log.info("Submitting " + files.size() + " files");
            submitXml(files);
        }
    }

    protected void submitXml(Collection<File> files) {
        try {
            HttpPost post = new HttpPost(submitUri.replaceAll("USERNAME", user).replaceAll("PASSWORD", password));
            Date date = new Date();
            File xml = getAnalysisXml(date, files);
            log.info("XML written to " + xml.getPath());
            File subXml = getSubmissionXml(date, xml);

            FileBody xmlBody = new FileBody(xml);
            FileBody subBody = new FileBody(subXml);
            HttpEntity entity = MultipartEntityBuilder.create().addPart("ANALYSIS", xmlBody)
                    .addPart("SUBMISSION", subBody).build();
            post.setEntity(entity);

            log.info("Uploading XML as post");
            HttpResponse response = httpClient.execute(post);
            if (response.getStatusLine().getStatusCode() != 200) {
                throw new EnaSubmissionException(
                        "Could not submit XML to ENA: " + response.getStatusLine().toString());
            }
            log.info("Status: " + response.getStatusLine());
            String receipt = IOUtils.toString(response.getEntity().getContent());
            log.info("Receipt: " + receipt);
            Document receiptDoc = parseReceipt(receipt);

            if (isSuccess(receiptDoc)) {
                log.info("Successfully submitted to ENA with analysis accession "
                        + getElemAttrib(receiptDoc, "ANALYSIS", "accession") + " and submission accession "
                        + getElemAttrib(receiptDoc, "SUBMISSION", "accession"));
            } else {
                String msg = "Submission failed: " + StringUtils.join(getElems(receiptDoc, "ERROR"), "; ");
                log.error(msg);
                throw new EnaSubmissionException(msg);
            }
            for (File file : files) {
                log.info("Deleting file " + file);
                file.delete();
            }
            log.info("Completed submitting XML");
        } catch (IOException | UnsupportedOperationException e) {
            throw new EnaSubmissionException("Unexpected error during file submission", e);
        }
    }

}