Example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getMetadata

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getMetadata.

Prototype

public PDMetadata getMetadata()

Source Link

Document

Get the metadata that is part of the document catalog.

Usage

From source file:PDFExtractMetadata.java

License:Apache License

/**
 * This is the main method.// w ww .  j a va 2  s  .  c  om
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 * @throws XmpParsingException
 */
public static void main(String[] args) throws IOException, XmpParsingException {
    if (args.length != 1) {
        usage();
        System.exit(1);
    } else {
        PDDocument document = null;
        try {
            document = PDDocument.load(new File(args[0]));
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDMetadata meta = catalog.getMetadata();
            if (meta != null) {
                DomXmpParser xmpParser = new DomXmpParser();
                try {
                    XMPMetadata metadata = xmpParser.parse(meta.createInputStream());

                    DublinCoreSchema dc = metadata.getDublinCoreSchema();
                    if (dc != null) {
                        display("Title:", dc.getTitle());
                        display("Description:", dc.getDescription());
                        listString("Creators: ", dc.getCreators());
                        listCalendar("Dates:", dc.getDates());
                        listString("Subjects:", dc.getSubjects());
                    }

                    AdobePDFSchema pdf = metadata.getAdobePDFSchema();
                    if (pdf != null) {
                        display("Keywords:", pdf.getKeywords());
                        display("PDF Version:", pdf.getPDFVersion());
                        display("PDF Producer:", pdf.getProducer());
                    }

                    XMPBasicSchema basic = metadata.getXMPBasicSchema();
                    if (basic != null) {
                        display("Create Date:", basic.getCreateDate());
                        display("Modify Date:", basic.getModifyDate());
                        display("Creator Tool:", basic.getCreatorTool());
                    }
                } catch (XmpParsingException e) {
                    System.err.println("An error ouccred when parsing the meta data: " + e.getMessage());
                }
            } else {
                // The pdf doesn't contain any metadata, try to use the
                // document information instead
                PDDocumentInformation information = document.getDocumentInformation();
                if (information != null) {
                    showDocumentInformation(information);
                }
            }

        } finally {
            if (document != null) {
                document.close();
            }
        }
    }

}

From source file:at.gv.egiz.pdfas.lib.impl.signing.pdfbox2.PADESPDFBOXSigner.java

License:EUPL

private String getPDFAVersion(PDDocument doc) {
    try {/*ww  w. j a  va  2  s .c o m*/
        PDDocumentCatalog cat = doc.getDocumentCatalog();
        PDMetadata metadata = cat.getMetadata();

        if (metadata != null) {
            DomXmpParser xmpParser = new DomXmpParser();
            XMPMetadata xmpMetadata = xmpParser.parse(metadata.exportXMPMetadata());
            if (xmpMetadata != null) {
                PDFAIdentificationSchema pdfaIdentificationSchema = xmpMetadata.getPDFIdentificationSchema();
                if (pdfaIdentificationSchema != null) {
                    Integer pdfaversion = pdfaIdentificationSchema.getPart();
                    String conformance = pdfaIdentificationSchema.getConformance();
                    logger.info("Detected PDF/A Version: {} - {}", pdfaversion, conformance);

                    if (pdfaversion != null) {
                        return String.valueOf(pdfaversion);
                    }
                }
            }
        }
    } catch (Throwable e) {
        logger.warn("Failed to determine PDF/A Version!", e);
    }
    return null;
}

From source file:com.wintindustries.pdffilter.pdfcore.PDFTester.java

static public void printMetadata(PDDocument document) throws IOException {
    PDDocumentInformation info = document.getDocumentInformation();
    PDDocumentCatalog cat = document.getDocumentCatalog();
    PDMetadata metadata = cat.getMetadata();
    System.out.println("Page Count=" + document.getNumberOfPages());
    System.out.println("Title=" + info.getTitle());
    System.out.println("Author=" + info.getAuthor());
    System.out.println("Subject=" + info.getSubject());
    System.out.println("Keywords=" + info.getKeywords());
    System.out.println("Creator=" + info.getCreator());
    System.out.println("Producer=" + info.getProducer());
    System.out.println("Creation Date=" + formatDate(info.getCreationDate()));
    System.out.println("Modification Date=" + formatDate(info.getModificationDate()));
    System.out.println("Trapped=" + info.getTrapped());
    if (metadata != null) {
        System.out.println("Metadata=" + metadata.getInputStreamAsString());
    }/*from ww w .  j av a2s  .com*/
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * @return empty Optional if no metadata has been found
 *///  ww  w  .  j av a2  s.  co  m
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    if (metaRaw == null) {
        return Optional.empty();
    }

    Document parseResult;
    try (InputStream is = metaRaw.createInputStream()) {
        parseResult = XMLUtil.parse(is);
    }
    XMPMetadata meta = new XMPMetadata(parseResult);
    meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
    return Optional.of(meta);
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntries as DublinCore XMP Schemas
 *
 * Existing DublinCore schemas in the document are removed
 *
 * @param document/* w  ww  .j a  v  a2s  .c o  m*/
 *            The pdf document to write to.
 * @param entries
 *            The BibTeX entries that are written as schemas
 * @param database
 *            maybenull An optional database which the given BibTeX entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @throws IOException
 * @throws TransformerException
 */
private static void writeDublinCore(PDDocument document, Collection<BibEntry> entries, BibDatabase database)
        throws IOException, TransformerException {

    Collection<BibEntry> resolvedEntries;
    if (database == null) {
        resolvedEntries = entries;
    } else {
        resolvedEntries = database.resolveForStrings(entries, false);
    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    XMPMetadata meta;
    if (metaRaw == null) {
        meta = new XMPMetadata();
    } else {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
    }

    // Remove all current Dublin-Core schemas
    List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
    for (XMPSchema schema : schemas) {
        schema.getElement().getParentNode().removeChild(schema.getElement());
    }

    for (BibEntry entry : resolvedEntries) {
        XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
        XMPUtil.writeToDCSchema(dcSchema, entry, null);
        meta.addSchema(dcSchema);
    }

    // Save to stream and then input that stream to the PDF
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    meta.save(os);
    ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
    PDMetadata metadataStream = new PDMetadata(document, is, false);
    catalog.setMetadata(metadataStream);
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntry in the XMP-stream of the given
 * PDF-file./*from   w ww. j  av  a2 s  .  c  o m*/
 *
 * Throws an IOException if the file cannot be read or written, so the user
 * can remove a lock or cancel the operation.
 *
 * The method will overwrite existing BibTeX-XMP-data, but keep other
 * existing metadata.
 *
 * @param file
 *            The file to write the entries to.
 * @param bibtexEntries
 *            The entries to write to the file. *
 * @param database
 *            maybenull An optional database which the given bibtex entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @param writePDFInfo
 *            Write information also in PDF document properties
 * @throws TransformerException
 *             If the entry was malformed or unsupported.
 * @throws IOException
 *             If the file could not be written to or could not be found.
 */
public static void writeXMP(File file, Collection<BibEntry> bibtexEntries, BibDatabase database,
        boolean writePDFInfo) throws IOException, TransformerException {

    Collection<BibEntry> resolvedEntries;
    if (database == null) {
        resolvedEntries = bibtexEntries;
    } else {
        resolvedEntries = database.resolveForStrings(bibtexEntries, false);
    }

    try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) {
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document.");
        }

        if (writePDFInfo && (resolvedEntries.size() == 1)) {
            XMPUtil.writeDocumentInformation(document, resolvedEntries.iterator().next(), null);
            XMPUtil.writeDublinCore(document, resolvedEntries, null);
        }

        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw == null) {
            meta = new XMPMetadata();
        } else {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);

        // Remove all current Bibtex-schemas
        List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        for (XMPSchema schema : schemas) {
            XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
            bib.getElement().getParentNode().removeChild(bib.getElement());
        }

        for (BibEntry e : resolvedEntries) {
            XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
            meta.addSchema(bibtex);
            bibtex.setBibtexEntry(e, null);
        }

        // Save to stream and then input that stream to the PDF
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        meta.save(os);
        ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
        PDMetadata metadataStream = new PDMetadata(document, is, false);
        catalog.setMetadata(metadataStream);

        // Save
        try {
            document.save(file.getAbsolutePath());
        } catch (COSVisitorException e) {
            throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage());
        }

    }
}

From source file:net.sf.jabref.util.XMPUtil.java

License:Open Source License

private static XMPMetadata getXMPMetadata(PDDocument document) throws IOException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    if (metaRaw == null) {
        return null;
    }/*from w  w w  .  j av a 2s .com*/

    XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
    meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
    return meta;
}

From source file:net.sf.jabref.util.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntries as DublinCore XMP Schemas
 * //from   ww  w . ja  v a  2s . c om
 * Existing DublinCore schemas in the document are removed
 * 
 * @param document
 *            The pdf document to write to.
 * @param entries
 *            The Bibtex entries that are written as schemas
 * @param database
 *            maybenull An optional database which the given bibtex entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @throws IOException
 * @throws TransformerException
 */
@SuppressWarnings("unchecked")
private static void writeDublinCore(PDDocument document, Collection<BibtexEntry> entries,
        BibtexDatabase database) throws IOException, TransformerException {

    if (database != null) {
        entries = database.resolveForStrings(entries, false);
    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    XMPMetadata meta;
    if (metaRaw != null) {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
    } else {
        meta = new XMPMetadata();
    }

    // Remove all current Dublin-Core schemas
    List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
    for (XMPSchema schema : schemas) {
        schema.getElement().getParentNode().removeChild(schema.getElement());
    }

    for (BibtexEntry entry : entries) {
        XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
        XMPUtil.writeToDCSchema(dcSchema, entry, null);
        meta.addSchema(dcSchema);
    }

    // Save to stream and then input that stream to the PDF
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    meta.save(os);
    ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
    PDMetadata metadataStream = new PDMetadata(document, is, false);
    catalog.setMetadata(metadataStream);
}

From source file:net.sf.jabref.util.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntry in the XMP-stream of the given
 * PDF-file./*ww  w  . ja  v a  2 s. c om*/
 * 
 * Throws an IOException if the file cannot be read or written, so the user
 * can remove a lock or cancel the operation.
 * 
 * The method will overwrite existing BibTeX-XMP-data, but keep other
 * existing metadata.
 * 
 * @param file
 *            The file to write the entries to.
 * @param bibtexEntries
 *            The entries to write to the file. *
 * @param database
 *            maybenull An optional database which the given bibtex entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @param writePDFInfo
 *            Write information also in PDF document properties
 * @throws TransformerException
 *             If the entry was malformed or unsupported.
 * @throws IOException
 *             If the file could not be written to or could not be found.
 */
@SuppressWarnings("unchecked")
public static void writeXMP(File file, Collection<BibtexEntry> bibtexEntries, BibtexDatabase database,
        boolean writePDFInfo) throws IOException, TransformerException {

    if (database != null) {
        bibtexEntries = database.resolveForStrings(bibtexEntries, false);
    }

    PDDocument document = null;

    try {
        document = PDDocument.load(file.getAbsoluteFile());
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document.");
        }

        if (writePDFInfo && (bibtexEntries.size() == 1)) {
            XMPUtil.writeDocumentInformation(document, bibtexEntries.iterator().next(), null);
            XMPUtil.writeDublinCore(document, bibtexEntries, null);
        }

        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw != null) {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        } else {
            meta = new XMPMetadata();
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);

        // Remove all current Bibtex-schemas
        List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        for (XMPSchema schema : schemas) {
            XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
            bib.getElement().getParentNode().removeChild(bib.getElement());
        }

        for (BibtexEntry e : bibtexEntries) {
            XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
            meta.addSchema(bibtex);
            bibtex.setBibtexEntry(e, null);
        }

        // Save to stream and then input that stream to the PDF
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        meta.save(os);
        ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
        PDMetadata metadataStream = new PDMetadata(document, is, false);
        catalog.setMetadata(metadataStream);

        // Save
        try {
            document.save(file.getAbsolutePath());
        } catch (COSVisitorException e) {
            throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage());
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.exoplatform.services.document.impl.PDFDocumentReader.java

License:Open Source License

public Properties getProperties(final InputStream is) throws IOException, DocumentReadException {
    try {/*from ww  w.  j  a va  2s .c om*/
        return SecurityHelper.doPrivilegedExceptionAction(new PrivilegedExceptionAction<Properties>() {
            public Properties run() throws Exception {
                if (is == null) {
                    throw new IllegalArgumentException("InputStream is null.");
                }

                PDDocument pdDocument = PDDocument.load(is);
                Properties props = new Properties();
                try {
                    if (pdDocument.isEncrypted()) {
                        try {
                            pdDocument.decrypt("");
                        } catch (InvalidPasswordException e) {
                            throw new DocumentReadException("The pdf document is encrypted.", e);
                        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
                            throw new DocumentReadException(e.getMessage(), e);
                        }
                    }

                    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
                    PDMetadata meta = catalog.getMetadata();
                    if (meta != null) {
                        XMPMetadata metadata = meta.exportXMPMetadata();

                        XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
                        if (dc != null) {
                            try {
                                if (dc.getTitle() != null)
                                    props.put(DCMetaData.TITLE, fixEncoding(dc.getTitle()));
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }
                            try {
                                if (dc.getDescription() != null)
                                    props.put(DCMetaData.DESCRIPTION, fixEncoding(dc.getDescription()));
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getCreators() != null) {
                                    for (String creator : dc.getCreators()) {
                                        props.put(DCMetaData.CREATOR, fixEncoding(creator));
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getDates() != null) {
                                    for (Calendar date : dc.getDates()) {
                                        props.put(DCMetaData.DATE, date);
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getDate failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaPDF pdf = metadata.getPDFSchema();
                        if (pdf != null) {
                            try {
                                if (pdf.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, fixEncoding(pdf.getKeywords()));
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }

                            try {
                                if (pdf.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, fixEncoding(pdf.getProducer()));
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaBasic basic = metadata.getBasicSchema();
                        if (basic != null) {
                            try {
                                if (basic.getCreateDate() != null)
                                    props.put(DCMetaData.DATE, basic.getCreateDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (basic.getModifyDate() != null)
                                    props.put(DCMetaData.DATE, basic.getModifyDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }

                            // DCMetaData.PUBLISHER - basic.getCreatorTool()
                        }
                    }

                    if (props.isEmpty()) {
                        // The pdf doesn't contain any metadata, try to use the document
                        // information instead
                        PDDocumentInformation docInfo = pdDocument.getDocumentInformation();

                        if (docInfo != null) {
                            try {
                                if (docInfo.getAuthor() != null)
                                    props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor());
                            } catch (Exception e) {
                                LOG.warn("getAuthor failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getCreationDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreator() != null)
                                    props.put(DCMetaData.CREATOR, docInfo.getCreator());
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }
                            try {

                                if (docInfo.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getModificationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getModificationDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getSubject() != null)
                                    props.put(DCMetaData.DESCRIPTION, docInfo.getSubject());
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getTitle() != null)
                                    props.put(DCMetaData.TITLE, docInfo.getTitle());
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }

                            // docInfo.getTrapped();
                        }
                    }
                } finally {
                    if (pdDocument != null) {
                        pdDocument.close();
                    }

                    if (is != null) {
                        try {
                            is.close();
                        } catch (IOException e) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("An exception occurred: " + e.getMessage());
                            }
                        }
                    }
                }
                return props;
            }
        });

    } catch (PrivilegedActionException pae) {
        Throwable cause = pae.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        } else if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        } else {
            throw new RuntimeException(cause);
        }
    }
}