Example usage for org.apache.pdfbox.pdmodel.common PDMetadata exportXMPMetadata

List of usage examples for org.apache.pdfbox.pdmodel.common PDMetadata exportXMPMetadata

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.common PDMetadata exportXMPMetadata.

Prototype

public InputStream exportXMPMetadata() throws IOException 

Source Link

Document

Extract the XMP metadata.

Usage

From source file:at.gv.egiz.pdfas.lib.impl.signing.pdfbox2.PADESPDFBOXSigner.java

License:EUPL

private String getPDFAVersion(PDDocument doc) {
    try {// w w  w . j  a va2s . c om
        PDDocumentCatalog cat = doc.getDocumentCatalog();
        PDMetadata metadata = cat.getMetadata();

        if (metadata != null) {
            DomXmpParser xmpParser = new DomXmpParser();
            XMPMetadata xmpMetadata = xmpParser.parse(metadata.exportXMPMetadata());
            if (xmpMetadata != null) {
                PDFAIdentificationSchema pdfaIdentificationSchema = xmpMetadata.getPDFIdentificationSchema();
                if (pdfaIdentificationSchema != null) {
                    Integer pdfaversion = pdfaIdentificationSchema.getPart();
                    String conformance = pdfaIdentificationSchema.getConformance();
                    logger.info("Detected PDF/A Version: {} - {}", pdfaversion, conformance);

                    if (pdfaversion != null) {
                        return String.valueOf(pdfaversion);
                    }
                }
            }
        }
    } catch (Throwable e) {
        logger.warn("Failed to determine PDF/A Version!", e);
    }
    return null;
}

From source file:org.apache.tika.parser.pdf.PDFPureJavaParser.java

License:Apache License

private Document loadDOM(PDMetadata pdMetadata, Metadata metadata, ParseContext context) {
    if (pdMetadata == null) {
        return null;
    }/*from   w  ww  .  ja v a2s  . co  m*/
    InputStream is = null;
    try {
        try {
            is = pdMetadata.exportXMPMetadata();
        } catch (IOException e) {
            EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
            return null;
        }
        DocumentBuilder documentBuilder = context.getDocumentBuilder();
        documentBuilder.setErrorHandler((ErrorHandler) null);
        return documentBuilder.parse(is);
    } catch (IOException | SAXException | TikaException e) {
        EmbeddedDocumentUtil.recordException(e, metadata);
    } finally {
        IOUtils.closeQuietly(is);
    }
    return null;

}

From source file:org.exoplatform.services.document.impl.PDFDocumentReader.java

License:Open Source License

public Properties getProperties(final InputStream is) throws IOException, DocumentReadException {
    try {//from w w w . j a v a2s.  c  om
        return SecurityHelper.doPrivilegedExceptionAction(new PrivilegedExceptionAction<Properties>() {
            public Properties run() throws Exception {
                if (is == null) {
                    throw new IllegalArgumentException("InputStream is null.");
                }

                PDDocument pdDocument = PDDocument.load(is);
                Properties props = new Properties();
                try {
                    if (pdDocument.isEncrypted()) {
                        try {
                            pdDocument.decrypt("");
                        } catch (InvalidPasswordException e) {
                            throw new DocumentReadException("The pdf document is encrypted.", e);
                        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
                            throw new DocumentReadException(e.getMessage(), e);
                        }
                    }

                    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
                    PDMetadata meta = catalog.getMetadata();
                    if (meta != null) {
                        XMPMetadata metadata = meta.exportXMPMetadata();

                        XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
                        if (dc != null) {
                            try {
                                if (dc.getTitle() != null)
                                    props.put(DCMetaData.TITLE, fixEncoding(dc.getTitle()));
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }
                            try {
                                if (dc.getDescription() != null)
                                    props.put(DCMetaData.DESCRIPTION, fixEncoding(dc.getDescription()));
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getCreators() != null) {
                                    for (String creator : dc.getCreators()) {
                                        props.put(DCMetaData.CREATOR, fixEncoding(creator));
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getDates() != null) {
                                    for (Calendar date : dc.getDates()) {
                                        props.put(DCMetaData.DATE, date);
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getDate failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaPDF pdf = metadata.getPDFSchema();
                        if (pdf != null) {
                            try {
                                if (pdf.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, fixEncoding(pdf.getKeywords()));
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }

                            try {
                                if (pdf.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, fixEncoding(pdf.getProducer()));
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaBasic basic = metadata.getBasicSchema();
                        if (basic != null) {
                            try {
                                if (basic.getCreateDate() != null)
                                    props.put(DCMetaData.DATE, basic.getCreateDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (basic.getModifyDate() != null)
                                    props.put(DCMetaData.DATE, basic.getModifyDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }

                            // DCMetaData.PUBLISHER - basic.getCreatorTool()
                        }
                    }

                    if (props.isEmpty()) {
                        // The pdf doesn't contain any metadata, try to use the document
                        // information instead
                        PDDocumentInformation docInfo = pdDocument.getDocumentInformation();

                        if (docInfo != null) {
                            try {
                                if (docInfo.getAuthor() != null)
                                    props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor());
                            } catch (Exception e) {
                                LOG.warn("getAuthor failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getCreationDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreator() != null)
                                    props.put(DCMetaData.CREATOR, docInfo.getCreator());
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }
                            try {

                                if (docInfo.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getModificationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getModificationDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getSubject() != null)
                                    props.put(DCMetaData.DESCRIPTION, docInfo.getSubject());
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getTitle() != null)
                                    props.put(DCMetaData.TITLE, docInfo.getTitle());
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }

                            // docInfo.getTrapped();
                        }
                    }
                } finally {
                    if (pdDocument != null) {
                        pdDocument.close();
                    }

                    if (is != null) {
                        try {
                            is.close();
                        } catch (IOException e) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("An exception occurred: " + e.getMessage());
                            }
                        }
                    }
                }
                return props;
            }
        });

    } catch (PrivilegedActionException pae) {
        Throwable cause = pae.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        } else if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        } else {
            throw new RuntimeException(cause);
        }
    }
}

From source file:org.lockss.pdf.pdfbox.PdfBoxDocument.java

License:Open Source License

@Override
public Document getMetadataAsXmp() throws PdfException {
    try {/*  w  w  w.ja  va  2  s  . c  om*/
        PDMetadata metadata = pdDocument.getDocumentCatalog().getMetadata();
        if (metadata == null) {
            return null;
        }
        return metadata.exportXMPMetadata().getXMPDocument();
    } catch (IOException ioe) {
        throw new PdfException("Error parsing XMP data", ioe);
    }
}

From source file:org.mustangproject.ZUGFeRD.MustangReaderWriterTest.java

License:Open Source License

private void checkPdfA3B(File tempFile) throws IOException, InvalidPasswordException {
    try (PDDocument doc = PDDocument.load(tempFile)) {
        PDMetadata metadata = doc.getDocumentCatalog().getMetadata();
        InputStream exportXMPMetadata = metadata.exportXMPMetadata();
        byte[] xmpBytes = new byte[exportXMPMetadata.available()];
        exportXMPMetadata.read(xmpBytes);
        final XMPMetadata xmp = new DomXmpParser().parse(xmpBytes);
        PDFAIdentificationSchema pdfaid = xmp.getPDFIdentificationSchema();
        assertEquals(pdfaid.getPart().intValue(), 3);
        assertEquals(pdfaid.getConformance(), "U");
    } catch (XmpParsingException e) {
        throw new IllegalStateException("Failed to read PDF", e);
    }/*www  .  ja va  2 s  .c  o  m*/
}