org.lockss.pdf.pdfbox.PdfBoxDocument.java Source code

Java tutorial

Introduction

Here is the source code for org.lockss.pdf.pdfbox.PdfBoxDocument.java

Source

/*
 * $Id$
 */

/*
    
Copyright (c) 2000-2016 Board of Trustees of Leland Stanford Jr. University,
all rights reserved.
    
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
    
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
    
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    
Except as contained in this notice, the name of Stanford University shall not
be used in advertising or otherwise to promote the sale, use or other dealings
in this Software without prior written authorization from Stanford University.
    
*/

package org.lockss.pdf.pdfbox;

import java.io.*;
import java.util.*;

import javax.xml.transform.TransformerException;

import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.lockss.pdf.*;
import org.lockss.pdf.PdfDocument;
import org.lockss.pdf.PdfPage;
import org.lockss.util.*;
import org.w3c.dom.Document;

/**
 * <p>
 * A {@link PdfDocument} implementation based on PDFBox 1.6.0.
 * </p>
 * <p>
 * This class acts as an adapter for the {@link PDDocument} class.
 * </p>
 * <p>
 * The logger in this class is used to record a few messages at
 * {@link Logger#LEVEL_WARNING} level if certain assumptions about the
 * state of this PDF document are violated. Other logging messages are
 * at {@link Logger#LEVEL_DEBUG2} or finer.
 * </p>
 * <ul>
 * <li>The finalizer ({@link Object#finalize()}) records instances
 * that are garbage-collected without having been explicitly closed,
 * and provides a stack trace context of when the document was
 * created.</li>
 * <li>The ISO-8859-1 encoding is guaranteed to exist in Java, but
 * should an {@link UnsupportedEncodingException} arise, a message is
 * recorded at {@link Logger#LEVEL_WARNING} level.</li>
 * </ul>
 * 
 * @author Thib Guicherd-Callin
 * @since 1.56
 * @see PdfBoxDocumentFactory
 */
public class PdfBoxDocument implements PdfDocument {

    /**
     * <p>
     * Logger for use by this class.
     * </p>
     * @since 1.56
     */
    private static final Logger log = Logger.getLogger(PdfBoxDocument.class);

    /**
     * <p>
     * The PDF document factory instance that created this PDf document instance.
     * </p>
     * 
     * @since 1.70
     */
    protected PdfBoxDocumentFactory pdfBoxDocumentFactory;

    /**
     * <p>
     * The {@link PDDocument} instance this instance represents.
     * </p>
     * 
     * @since 1.56
     */
    protected final PDDocument pdDocument;

    /**
     * <p>
     * Whether this instance has been closed.
     * </p>
     * 
     * @since 1.56
     */
    private volatile boolean closed;

    /**
     * <p>
     * String representation of the context when the document was
     * created.
     * </p>
     */
    private String openStackTrace;

    /**
     * <p>
     * Constructor. <b>Deprectaed in 1.70.</b>
     * </p>
     * 
     * @param pdDocument
     *          The {@link PDDocument} instance underpinning this PDF document
     * @since 1.56
     * @deprecated Deprecated since 1.70. Use
     *             {@link #PdfBoxDocument(PdfDocumentFactory, PDDocument)}
     *             instead.
     */
    @Deprecated
    public PdfBoxDocument(PDDocument pdDocument) {
        this(null, pdDocument);
    }

    /**
     * <p>
     * Constructor.
     * </p>
     * 
     * @param pdDocument The {@link PDDocument} instance underpinning
     *          this PDF document
     * @since 1.70
     */
    public PdfBoxDocument(PdfBoxDocumentFactory pdfBoxDocumentFactory, PDDocument pdDocument) {
        this.pdfBoxDocumentFactory = pdfBoxDocumentFactory;
        this.pdDocument = pdDocument;
        this.closed = false;

        StringBuilder sb = new StringBuilder();
        for (StackTraceElement e : Thread.currentThread().getStackTrace()) {
            if (sb.length() == 0) {
                continue;
            } // That's getStackTrace()
            sb.append('\n'); // Intentional, see finalize()
            sb.append(e.toString());
        }
        this.openStackTrace = sb.toString();
    }

    @Override
    public void close() throws PdfException {
        try {
            log.debug2("Closing PDF document explicitly");
            closed = true;
            pdDocument.close();
        } catch (IOException ioe) {
            log.debug2("Exception closing PDF document explicitly", ioe);
            throw new PdfException(ioe);
        }
    }

    @Override
    public String getAuthor() {
        return pdDocument.getDocumentInformation().getAuthor();
    }

    @Override
    public Calendar getCreationDate() throws PdfException {
        try {
            return pdDocument.getDocumentInformation().getCreationDate();
        } catch (IOException ioe) {
            throw new PdfException("Error processing the creation date", ioe);
        }
    }

    @Override
    public String getCreator() {
        return pdDocument.getDocumentInformation().getCreator();
    }

    @Override
    public PdfBoxDocumentFactory getDocumentFactory() {
        return pdfBoxDocumentFactory;
    }

    @Override
    public String getKeywords() {
        return pdDocument.getDocumentInformation().getKeywords();
    }

    @Override
    public String getLanguage() {
        return pdDocument.getDocumentCatalog().getLanguage();
    }

    @Override
    public String getMetadata() throws PdfException {
        try {
            PDMetadata metadata = pdDocument.getDocumentCatalog().getMetadata();
            if (metadata == null) {
                return null;
            }
            return metadata.getInputStreamAsString();
        } catch (IOException ioe) {
            throw new PdfException("Error converting metadata stream to string", ioe);
        }
    }

    @Override
    public Document getMetadataAsXmp() throws PdfException {
        try {
            PDMetadata metadata = pdDocument.getDocumentCatalog().getMetadata();
            if (metadata == null) {
                return null;
            }
            return metadata.exportXMPMetadata().getXMPDocument();
        } catch (IOException ioe) {
            throw new PdfException("Error parsing XMP data", ioe);
        }
    }

    @Override
    public Calendar getModificationDate() throws PdfException {
        try {
            return pdDocument.getDocumentInformation().getModificationDate();
        } catch (IOException ioe) {
            throw new PdfException("Error processing the modification date", ioe);
        }
    }

    @Override
    public int getNumberOfPages() {
        return pdDocument.getNumberOfPages();
    }

    @Override
    public PdfPage getPage(int index) throws PdfException {
        /*
         * IMPLEMENTATION NOTE
         * 
         * The documentation of getAllPages() (PDFBox 1.6.0:
         * PDDocumentCatalog line 205) states that all the elements in the
         * returned list are of type PDPage.
         */
        return getDocumentFactory().makePage(this,
                /*(PDPage)*/pdDocument.getDocumentCatalog().getAllPages().get(index));
    }

    @Override
    public List<PdfPage> getPages() throws PdfException {
        /*
         * IMPLEMENTATION NOTE
         * 
         * The documentation of getAllPages() (PDFBox 1.6.0:
         * PDDocumentCatalog line 205) states that all the elements in the
         * returned list are of type PDPage.
         */
        List<PdfPage> ret = new ArrayList<PdfPage>();
        for (Object obj : pdDocument.getDocumentCatalog().getAllPages()) {
            ret.add(getDocumentFactory().makePage(this, /*(PDPage)*/obj));
        }
        return ret;
    }

    @Override
    public String getProducer() {
        return pdDocument.getDocumentInformation().getProducer();
    }

    @Override
    public String getSubject() {
        return pdDocument.getDocumentInformation().getSubject();
    }

    @Override
    public String getTitle() {
        return pdDocument.getDocumentInformation().getTitle();
    }

    @Override
    public Map<String, PdfToken> getTrailer() {
        COSDictionary trailer = pdDocument.getDocument().getTrailer();
        if (trailer == null) {
            trailer = new COSDictionary();
        }
        return PdfBoxTokens.getDictionary(trailer);
    }

    @Override
    public void removePage(int index) {
        pdDocument.removePage(index);
    }

    @Override
    public void save(OutputStream outputStream) throws IOException, PdfException {
        if (closed) {
            throw new PdfException("PDF document already closed");
        }
        try {
            pdDocument.save(outputStream);
        } catch (COSVisitorException cve) {
            log.debug2("Error saving PDF document", cve);
            throw new PdfException("Error saving PDF document", cve);
        }
    }

    @Override
    public void setAuthor(String author) {
        pdDocument.getDocumentInformation().setAuthor(author);
    }

    @Override
    public void setCreationDate(Calendar date) {
        pdDocument.getDocumentInformation().setCreationDate(date);
    }

    @Override
    public void setCreator(String creator) {
        pdDocument.getDocumentInformation().setCreator(creator);
    }

    @Override
    public void setKeywords(String keywords) {
        pdDocument.getDocumentInformation().setKeywords(keywords);
    }

    @Override
    public void setLanguage(String language) {
        pdDocument.getDocumentCatalog().setLanguage(language);
    }

    @Override
    public void setMetadata(String metadata) throws PdfException {
        /*
         * IMPLEMENTATION NOTE
         * 
         * getInputStreamAsString() (PDFBox 1.6.0: PDStream line 496) uses
         * the encoding ISO-8859-1, so we need to encode the string
         * accordingly. If it defined a constant, we could use it, but it
         * hard-codes the string "ISO-8859-1".
         */
        try {
            InputStream is = new ByteArrayInputStream(metadata.getBytes(Constants.ENCODING_ISO_8859_1));
            pdDocument.getDocumentCatalog().setMetadata(new PDMetadata(pdDocument, is, false));
        } catch (UnsupportedEncodingException uee) {
            // Shouldn't happen, ISO-8859-1 is guaranteed to exist
            log.warning("Unexpected unsupported encoding exception: " + Constants.ENCODING_ISO_8859_1, uee);
            throw new PdfException("Unexpected error converting metadata string to stream", uee);
        } catch (IOException ioe) {
            throw new PdfException("Error converting metadata string to stream", ioe);
        }
    }

    @Override
    public void setMetadataFromXmp(Document xmpDocument) throws PdfException {
        try {
            pdDocument.getDocumentCatalog().getMetadata().importXMPMetadata(new XMPMetadata(xmpDocument));
        } catch (IOException ioe) {
            throw new PdfException("Error converting XMP document to metadata", ioe);
        } catch (TransformerException te) {
            throw new PdfException("Error converting XMP document to metadata", te);
        }
    }

    @Override
    public void setModificationDate(Calendar date) {
        pdDocument.getDocumentInformation().setModificationDate(date);
    }

    @Override
    public void setProducer(String producer) {
        pdDocument.getDocumentInformation().setProducer(producer);
    }

    @Override
    public void setSubject(String subject) {
        pdDocument.getDocumentInformation().setSubject(subject);
    }

    @Override
    public void setTitle(String title) {
        pdDocument.getDocumentInformation().setTitle(title);
    }

    @Override
    public void setTrailer(Map<String, PdfToken> trailerMapping) {
        pdDocument.getDocument().setTrailer(PdfBoxTokens.asCOSDictionary(trailerMapping));
    }

    @Override
    public void unsetAuthor() {
        pdDocument.getDocumentInformation().setAuthor(null);
    }

    @Override
    public void unsetCreationDate() {
        pdDocument.getDocumentInformation().setCreationDate(null);
    }

    @Override
    public void unsetCreator() {
        pdDocument.getDocumentInformation().setCreator(null);
    }

    @Override
    public void unsetKeywords() {
        pdDocument.getDocumentInformation().setKeywords(null);
    }

    @Override
    public void unsetLanguage() {
        pdDocument.getDocumentCatalog().setLanguage(null);
    }

    @Override
    public void unsetMetadata() {
        pdDocument.getDocumentCatalog().setMetadata(null);
    }

    @Override
    public void unsetModificationDate() {
        pdDocument.getDocumentInformation().setModificationDate(null);
    }

    @Override
    public void unsetProducer() {
        pdDocument.getDocumentInformation().setProducer(null);
    }

    @Override
    public void unsetSubject() {
        pdDocument.getDocumentInformation().setSubject(null);
    }

    @Override
    public void unsetTitle() {
        pdDocument.getDocumentInformation().setTitle(null);
    }

    @Override
    protected void finalize() throws Throwable {
        try {
            if (!closed) {
                // Starts with newline, doesn't end with one, see constructor
                log.warning("Closing PDF document implicitly in finalizer; creation context:" + openStackTrace);
                pdDocument.close();
            }
        } catch (Exception exc) {
            log.debug2("Exception closing PDF document implicitly in finalizer", exc);
            // Don't rethrow
        }
    }

}