eu.scape_project.planning.xml.ProjectExportAction.java Source code

Java tutorial

Introduction

Here is the source code for eu.scape_project.planning.xml.ProjectExportAction.java

Source

/*******************************************************************************
 * Copyright 2006 - 2012 Vienna University of Technology,
 * Department of Software Technology and Interactive Systems, IFS
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, softwareBecker
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package eu.scape_project.planning.xml;

import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import javax.inject.Inject;
import javax.persistence.EntityManager;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.WriterOutputStream;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.XPath;
import org.dom4j.io.DocumentSource;
import org.slf4j.Logger;

import eu.scape_project.planning.exception.PlanningException;
import eu.scape_project.planning.manager.DigitalObjectManager;
import eu.scape_project.planning.manager.StorageException;
import eu.scape_project.planning.model.DigitalObject;
import eu.scape_project.planning.model.Plan;
import eu.scape_project.planning.model.PlanProperties;
import eu.scape_project.planning.utils.FileUtils;
import eu.scape_project.planning.utils.OS;

/**
 * This class provides methods to export plans from the database to their XML representation. 
 * 
 * @author Christoph Becker
 */
public class ProjectExportAction implements Serializable {
    private static final long serialVersionUID = 2155152208617526555L;

    /**
     * Boundary of data to load before calling the garbage collector.
     */
    private static final int LOADED_DATA_SIZE_BOUNDARY = 200 * 1024 * 1024;

    @Inject
    private Logger log;

    @Inject
    private EntityManager em;

    @Inject
    protected DigitalObjectManager digitalObjectManager;

    private String lastProjectExportPath;

    public ProjectExportAction() {
        lastProjectExportPath = null;
    }

    /**
     * Exports all projects into separate xml files and adds them to a zip
     * archive.
     * 
     * @return True if export was successful, false otherwise.
     */
    public boolean exportAllProjectsToZip() {
        @SuppressWarnings("unchecked")
        List<PlanProperties> ppList = em.createQuery("select p from PlanProperties p order by p.id")
                .getResultList();

        return exportPPListToZip(ppList);
    }

    /**
     * Exports all plans where the {@link PlanProperties#getId()} is in the given range [fromID, toID] (inclusive)
     * and adds them to a zip archive
     * 
     * @param fromID
     *            from-ID in table PlanProperties, which is used to filter
     *            PlanProperties
     * @param toID
     *            to-ID in table PlanProperties, which is used to filter
     *            PlanProperties
     * @return True if export was successful, false otherwise.
     */
    public boolean exportSomeProjectsToZip(int fromID, int toID) {
        @SuppressWarnings("unchecked")
        List<PlanProperties> ppList = em
                .createQuery("select p.planProperties from Plan p where " + " p.planProperties.id >= :fromID "
                        + " and p.planProperties.id <= :toID order by p.planProperties.id")
                .setParameter("fromID", fromID).setParameter("toID", toID).getResultList();

        return exportPPListToZip(ppList);
    }

    /**
     * Exports the project identified by PlanProperties.Id ppid and writes the
     * document to the given OutputStream - including all binary data.
     * (currently required by {@link #exportAllProjectsToZip()} ) - Does NOT
     * clean up temp files written to baseTempPath
     * 
     * @param ppid
     * @param out
     * @param baseTempPath
     *            used to write temp files for binary data, must not be used by
     *            other exports at the same time
     * @return True if export was successful, false otherwise.
     */
    public boolean exportComplete(int ppid, OutputStream out, String baseTempPath) {
        ProjectExporter exporter = new ProjectExporter();
        Document doc = exporter.createProjectDoc();

        Plan plan = null;
        try {
            plan = em.createQuery("select p from Plan p where p.planProperties.id = :ppid ", Plan.class)
                    .setParameter("ppid", ppid).getSingleResult();
        } catch (Exception e) {
            log.error("Could not load planProperties: ", e);
            log.debug("Skipping the export of the plan with properties " + ppid + ": Couldnt load.");
            return false;
        }
        try {
            String tempPath = baseTempPath;
            File tempDir = new File(tempPath);
            tempDir.mkdirs();

            try {
                exporter.addProject(plan, doc, false);

                // Perform XSLT transformation to get the DATA into the PLANS
                // Prepare base 64 encoded binary data
                List<Integer> binaryObjectIds = getBinaryObjectIds(doc);
                writeBinaryObjects(binaryObjectIds, tempPath);
                // Prepare preservation action plan
                List<Integer> preservationActionPlanIDs = getPreservationActionPlanIds(doc);
                writeDigitalObjects(preservationActionPlanIDs, tempPath);
                // Call XSLT
                addBinaryData(doc, out, tempPath);

            } catch (IOException e) {
                log.error("Could not open outputstream.", e);
                return false;
            } catch (TransformerException e) {
                log.error("failed to generate export file.", e);
                return false;
            } catch (StorageException e) {
                log.error("Could not load object from stoarge.", e);
                return false;
            } catch (PlanningException e) {
                log.error("Could not export plan.", e);
                return false;
            }
        } finally {
            // Clean up
            plan = null;

            em.clear();
            System.gc();
        }

        return true;
    }

    /**
     * Returns a list of object IDs that are stored in the document without
     * binary data.
     * 
     * @param doc
     *            the document to search
     * @return a list of IDs
     */
    private List<Integer> getBinaryObjectIds(Document doc) {

        // Get data elements that have data and a number as content
        XPath xpath = doc.createXPath("//plato:data[@hasData='true' and number(.) = number(.)]");

        Map<String, String> namespaceMap = new HashMap<String, String>();
        namespaceMap.put("plato", PlanXMLConstants.PLATO_NS);
        xpath.setNamespaceURIs(namespaceMap);

        @SuppressWarnings("unchecked")
        List<Element> elements = xpath.selectNodes(doc);

        List<Integer> objectIds = new ArrayList<Integer>(elements.size());
        for (Element element : elements) {
            objectIds.add(Integer.parseInt(element.getStringValue()));
        }
        return objectIds;
    }

    /**
     * Returns the collection profile IDs that are in the document without data.
     * 
     * @param doc
     *            the docuemnt to seasrch
     * @return a list of IDs
     */
    private List<Integer> getPreservationActionPlanIds(Document doc) {
        // Get data elements that have data and a number as content
        XPath xpath = doc.createXPath("//plato:preservationActionPlan[number(.) = number(.)]");

        Map<String, String> namespaceMap = new HashMap<String, String>();
        namespaceMap.put("plato", PlanXMLConstants.PLATO_NS);
        xpath.setNamespaceURIs(namespaceMap);

        @SuppressWarnings("unchecked")
        List<Element> elements = xpath.selectNodes(doc);

        List<Integer> objectIds = new ArrayList<Integer>(elements.size());
        for (Element element : elements) {
            objectIds.add(Integer.parseInt(element.getStringValue()));
        }
        return objectIds;
    }

    /**
     * Writes the digital objects of the provided objectIds to the tempDir as
     * files.
     * 
     * @param objectIds
     *            the IDs of the objects to write
     * @param tempDir
     *            a temporary directory where the files will be written
     * @throws IOException
     *             if an error occurred during write
     * @throws StorageException
     *             if the objects could not be loaded
     */
    private void writeDigitalObjects(List<Integer> objectIds, String tempDir) throws IOException, StorageException {
        int counter = 0;
        int skip = 0;
        log.info("Writing bytestreams of digital objects. Size = " + objectIds.size());
        for (Integer id : objectIds) {
            if (counter > LOADED_DATA_SIZE_BOUNDARY) { // Call GC if unused data
                                                       // exceeds boundary
                System.gc();
                counter = 0;
            }
            DigitalObject object = em.find(DigitalObject.class, id);
            if (object.isDataExistent()) {
                counter += object.getData().getSize();
                File f = new File(tempDir + object.getId() + ".xml");
                DigitalObject dataFilledObject = digitalObjectManager.getCopyOfDataFilledDigitalObject(object);
                FileOutputStream out = new FileOutputStream(f);
                try {
                    out.write(dataFilledObject.getData().getData());
                } finally {
                    out.close();
                }
                dataFilledObject = null;
            } else {
                skip++;
            }
            object = null;
        }
        em.clear();
        System.gc();
        log.info("Finished writing bytestreams of digital objects. Skipped empty objects: " + skip);
    }

    /**
     * new helper method that was refactored from
     * {@link #exportAllProjectsToZip()} It takes a list of
     * {@link PlanProperties} and exports it to a zip file.
     * 
     * @param ppList
     *            {@link PlanProperties} for plans to export
     * 
     * @return True if export was successful, false otherwise.
     */
    private boolean exportPPListToZip(List<PlanProperties> ppList) {
        if (!ppList.isEmpty()) {
            log.debug("number of plans to export: " + ppList.size());
            String filename = "allprojects.zip";

            lastProjectExportPath = OS.getTmpPath() + "export" + System.currentTimeMillis() + "/";
            new File(lastProjectExportPath).mkdirs();

            String binarydataTempPath = lastProjectExportPath + "binarydata/";
            File binarydataTempDir = new File(binarydataTempPath);
            binarydataTempDir.mkdirs();

            try {
                OutputStream out = new BufferedOutputStream(new FileOutputStream(lastProjectExportPath + filename));
                ZipOutputStream zipOut = new ZipOutputStream(out);

                for (PlanProperties pp : ppList) {
                    log.debug("EXPORTING: " + pp.getName());
                    ZipEntry zipAdd = new ZipEntry(String.format("%1$03d", pp.getId()) + "-"
                            + FileUtils.makeFilename(pp.getName()) + ".xml");
                    zipOut.putNextEntry(zipAdd);
                    // export the complete project, including binary data
                    exportComplete(pp.getId(), zipOut, binarydataTempPath);
                    zipOut.closeEntry();
                }
                zipOut.close();
                out.close();
                new File(lastProjectExportPath + "finished.info").createNewFile();

                // FacesMessages.instance().add(FacesMessage.SEVERITY_INFO,
                // "Export was written to: " + exportPath);
                log.info("Export was written to: " + lastProjectExportPath);
            } catch (IOException e) {
                // FacesMessages.instance().add(FacesMessage.SEVERITY_ERROR,
                // "An error occured while generating the export file.");
                log.error("An error occured while generating the export file.", e);
                File errorInfo = new File(lastProjectExportPath + "error.info");
                try {
                    Writer w = new FileWriter(errorInfo);
                    w.write("An error occured while generating the export file:");
                    w.write(e.getMessage());
                    w.close();
                } catch (IOException e1) {
                    log.error("Could not write error file.");
                }

                return false;
            } finally {
                // remove all binary temp files
                OS.deleteDirectory(binarydataTempDir);
            }
        }

        return true;
    }

    /**
     * Performs XSLT transformation to get the data into the plans.
     * 
     * @param doc
     *            the plan document
     * @param out
     *            output stream to write the transformed plan XML
     * @param tempDir
     *            temporary directory where the data files are located
     * @throws TransformerException
     *             if an error occured during transformation
     */
    private void addBinaryData(Document doc, OutputStream out, String tempDir) throws TransformerException {
        InputStream xsl = Thread.currentThread().getContextClassLoader()
                .getResourceAsStream("data/xslt/bytestreams.xsl");

        TransformerFactory transformerFactory = TransformerFactory.newInstance();

        Transformer transformer = transformerFactory.newTransformer(new StreamSource(xsl));
        transformer.setParameter("tempDir", tempDir);

        Source xmlSource = new DocumentSource(doc);

        Result outputTarget = new StreamResult(out); // new
                                                     // FileWriter(outFile));

        log.debug("starting bytestream transformation ...");
        transformer.transform(xmlSource, outputTarget);
        log.debug("FINISHED bytestream transformation!");
    }

    /**
     * Loads all binary data for the given digital objects and dumps it to XML
     * files, located in tempDir.
     * 
     * @param objectIds
     * @param tempDir
     * @param encoder
     * @throws IOException
     * @throws StorageException
     */
    private void writeBinaryObjects(List<Integer> objectIds, String aTempDir) throws IOException, StorageException {
        int counter = 0;
        int skip = 0;
        log.info("writing XMLs for bytestreams of digital objects. count = " + objectIds.size());
        for (Integer id : objectIds) {
            if (counter > LOADED_DATA_SIZE_BOUNDARY) { // Call GC if unused data
                                                       // exceeds boundary
                System.gc();
                counter = 0;
            }
            DigitalObject object = em.find(DigitalObject.class, id);
            if (object.isDataExistent()) {
                counter += object.getData().getSize();
                File f = new File(aTempDir + object.getId() + ".xml");
                DigitalObject dataFilledObject = null;
                dataFilledObject = digitalObjectManager.getCopyOfDataFilledDigitalObject(object);
                writeBinaryData(id, new ByteArrayInputStream(dataFilledObject.getData().getData()), f);
                dataFilledObject = null;
            } else {
                skip++;
            }
            object = null;
        }
        em.clear();
        System.gc();
        log.info("Finished writing bytestreams of digital objects. Skipped empty objects: " + skip);
    }

    /**
     * Dumps binary data to provided file. It results in an XML file with a
     * single element: data.
     * 
     * @param id
     * @param data
     * @param f
     * @param encoder
     * @throws IOException
     */
    private static void writeBinaryData(int id, InputStream data, File f) throws IOException {

        XMLOutputFactory factory = XMLOutputFactory.newInstance();
        try {
            XMLStreamWriter writer = factory.createXMLStreamWriter(new FileWriter(f));

            writer.writeStartDocument(PlanXMLConstants.ENCODING, "1.0");
            writer.writeStartElement("data");
            writer.writeAttribute("id", "" + id);

            Base64InputStream base64EncodingIn = new Base64InputStream(data, true,
                    PlanXMLConstants.BASE64_LINE_LENGTH, PlanXMLConstants.BASE64_LINE_BREAK);

            OutputStream out = new WriterOutputStream(new XMLStreamContentWriter(writer),
                    PlanXMLConstants.ENCODING);
            // read the binary data and encode it on the fly
            IOUtils.copy(base64EncodingIn, out);
            out.flush();

            // all data is written - end 
            writer.writeEndElement();
            writer.writeEndDocument();

            writer.flush();
            writer.close();

        } catch (XMLStreamException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // -------- getter/setter --------
    public String getLastProjectExportPath() {
        return lastProjectExportPath;
    }

    public void setLastProjectExportPath(String lastProjectExportPath) {
        this.lastProjectExportPath = lastProjectExportPath;
    }

    // /**
    // * Adds all enlisted plans to an XML document, but does NOT write binary
    // data.
    // * Instead the Id's of all referenced uploads and sample records are added
    // to the provided lists,
    // * this way they can be added later.
    // *
    // * @param ppids
    // * @param uploadIDs
    // * @param recordIDs
    // * @return
    // */
    // public Document exportToXml(List<Integer> ppids, List<Integer> uploadIDs,
    // List<Integer> recordIDs) {
    // ProjectExporter exporter = new ProjectExporter();
    // Document doc = exporter.createProjectDoc();
    //
    // int i = 0;
    // for (Integer id: ppids) {
    // // load one plan after the other:
    // List<Plan> list = em.createQuery(
    // "select p from Plan p where p.planProperties.id = "
    // + id).getResultList();
    // if (list.size() != 1) {
    // FacesMessages.instance().add(FacesMessage.SEVERITY_ERROR,
    // "Skipping the export of the plan with properties"+id+": Couldnt load.");
    // } else {
    // //log.debug("adding project "+p.getplanProperties().getName()+" to XML...");
    // exporter.addProject(list.get(0), doc, uploadIDs, recordIDs);
    // }
    // list.clear();
    // list = null;
    //
    // log.info("XMLExport: addString destinationed project ppid="+id);
    // i++;
    // if ((i%10==0)) {
    // em.clear();
    // System.gc();
    // }
    // }
    // return doc;
    // }
}