List of usage examples for org.apache.pdfbox.pdmodel PDDocumentCatalog getMetadata
public PDMetadata getMetadata()
From source file:PDFExtractMetadata.java
License:Apache License
/** * This is the main method.// w ww . j a va 2 s . c om * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. * @throws XmpParsingException */ public static void main(String[] args) throws IOException, XmpParsingException { if (args.length != 1) { usage(); System.exit(1); } else { PDDocument document = null; try { document = PDDocument.load(new File(args[0])); PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata meta = catalog.getMetadata(); if (meta != null) { DomXmpParser xmpParser = new DomXmpParser(); try { XMPMetadata metadata = xmpParser.parse(meta.createInputStream()); DublinCoreSchema dc = metadata.getDublinCoreSchema(); if (dc != null) { display("Title:", dc.getTitle()); display("Description:", dc.getDescription()); listString("Creators: ", dc.getCreators()); listCalendar("Dates:", dc.getDates()); listString("Subjects:", dc.getSubjects()); } AdobePDFSchema pdf = metadata.getAdobePDFSchema(); if (pdf != null) { display("Keywords:", pdf.getKeywords()); display("PDF Version:", pdf.getPDFVersion()); display("PDF Producer:", pdf.getProducer()); } XMPBasicSchema basic = metadata.getXMPBasicSchema(); if (basic != null) { display("Create Date:", basic.getCreateDate()); display("Modify Date:", basic.getModifyDate()); display("Creator Tool:", basic.getCreatorTool()); } } catch (XmpParsingException e) { System.err.println("An error ouccred when parsing the meta data: " + e.getMessage()); } } else { // The pdf doesn't contain any metadata, try to use the // document information instead PDDocumentInformation information = document.getDocumentInformation(); if (information != null) { showDocumentInformation(information); } } } finally { if (document != null) { document.close(); } } } }
From source file:at.gv.egiz.pdfas.lib.impl.signing.pdfbox2.PADESPDFBOXSigner.java
License:EUPL
private String getPDFAVersion(PDDocument doc) { try {/*ww w. j a va 2 s .c o m*/ PDDocumentCatalog cat = doc.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); if (metadata != null) { DomXmpParser xmpParser = new DomXmpParser(); XMPMetadata xmpMetadata = xmpParser.parse(metadata.exportXMPMetadata()); if (xmpMetadata != null) { PDFAIdentificationSchema pdfaIdentificationSchema = xmpMetadata.getPDFIdentificationSchema(); if (pdfaIdentificationSchema != null) { Integer pdfaversion = pdfaIdentificationSchema.getPart(); String conformance = pdfaIdentificationSchema.getConformance(); logger.info("Detected PDF/A Version: {} - {}", pdfaversion, conformance); if (pdfaversion != null) { return String.valueOf(pdfaversion); } } } } } catch (Throwable e) { logger.warn("Failed to determine PDF/A Version!", e); } return null; }
From source file:com.wintindustries.pdffilter.pdfcore.PDFTester.java
static public void printMetadata(PDDocument document) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println("Page Count=" + document.getNumberOfPages()); System.out.println("Title=" + info.getTitle()); System.out.println("Author=" + info.getAuthor()); System.out.println("Subject=" + info.getSubject()); System.out.println("Keywords=" + info.getKeywords()); System.out.println("Creator=" + info.getCreator()); System.out.println("Producer=" + info.getProducer()); System.out.println("Creation Date=" + formatDate(info.getCreationDate())); System.out.println("Modification Date=" + formatDate(info.getModificationDate())); System.out.println("Trapped=" + info.getTrapped()); if (metadata != null) { System.out.println("Metadata=" + metadata.getInputStreamAsString()); }/*from ww w . j av a2s .com*/ }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * @return empty Optional if no metadata has been found */// ww w . j av a2 s. co m private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); if (metaRaw == null) { return Optional.empty(); } Document parseResult; try (InputStream is = metaRaw.createInputStream()) { parseResult = XMLUtil.parse(is); } XMPMetadata meta = new XMPMetadata(parseResult); meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); return Optional.of(meta); }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntries as DublinCore XMP Schemas * * Existing DublinCore schemas in the document are removed * * @param document/* w ww .j a v a2s .c o m*/ * The pdf document to write to. * @param entries * The BibTeX entries that are written as schemas * @param database * maybenull An optional database which the given BibTeX entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @throws IOException * @throws TransformerException */ private static void writeDublinCore(PDDocument document, Collection<BibEntry> entries, BibDatabase database) throws IOException, TransformerException { Collection<BibEntry> resolvedEntries; if (database == null) { resolvedEntries = entries; } else { resolvedEntries = database.resolveForStrings(entries, false); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw == null) { meta = new XMPMetadata(); } else { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } // Remove all current Dublin-Core schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { schema.getElement().getParentNode().removeChild(schema.getElement()); } for (BibEntry entry : resolvedEntries) { XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta); XMPUtil.writeToDCSchema(dcSchema, entry, null); meta.addSchema(dcSchema); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the XMP-stream of the given * PDF-file./*from w ww. j av a2 s . c o m*/ * * Throws an IOException if the file cannot be read or written, so the user * can remove a lock or cancel the operation. * * The method will overwrite existing BibTeX-XMP-data, but keep other * existing metadata. * * @param file * The file to write the entries to. * @param bibtexEntries * The entries to write to the file. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @param writePDFInfo * Write information also in PDF document properties * @throws TransformerException * If the entry was malformed or unsupported. * @throws IOException * If the file could not be written to or could not be found. */ public static void writeXMP(File file, Collection<BibEntry> bibtexEntries, BibDatabase database, boolean writePDFInfo) throws IOException, TransformerException { Collection<BibEntry> resolvedEntries; if (database == null) { resolvedEntries = bibtexEntries; } else { resolvedEntries = database.resolveForStrings(bibtexEntries, false); } try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) { if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document."); } if (writePDFInfo && (resolvedEntries.size() == 1)) { XMPUtil.writeDocumentInformation(document, resolvedEntries.iterator().next(), null); XMPUtil.writeDublinCore(document, resolvedEntries, null); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw == null) { meta = new XMPMetadata(); } else { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); // Remove all current Bibtex-schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; bib.getElement().getParentNode().removeChild(bib.getElement()); } for (BibEntry e : resolvedEntries) { XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta); meta.addSchema(bibtex); bibtex.setBibtexEntry(e, null); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); // Save try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage()); } } }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
private static XMPMetadata getXMPMetadata(PDDocument document) throws IOException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); if (metaRaw == null) { return null; }/*from w w w . j av a 2s .com*/ XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); return meta; }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntries as DublinCore XMP Schemas * //from ww w . ja v a 2s . c om * Existing DublinCore schemas in the document are removed * * @param document * The pdf document to write to. * @param entries * The Bibtex entries that are written as schemas * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @throws IOException * @throws TransformerException */ @SuppressWarnings("unchecked") private static void writeDublinCore(PDDocument document, Collection<BibtexEntry> entries, BibtexDatabase database) throws IOException, TransformerException { if (database != null) { entries = database.resolveForStrings(entries, false); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw != null) { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } else { meta = new XMPMetadata(); } // Remove all current Dublin-Core schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { schema.getElement().getParentNode().removeChild(schema.getElement()); } for (BibtexEntry entry : entries) { XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta); XMPUtil.writeToDCSchema(dcSchema, entry, null); meta.addSchema(dcSchema); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the XMP-stream of the given * PDF-file./*ww w . ja v a 2 s. c om*/ * * Throws an IOException if the file cannot be read or written, so the user * can remove a lock or cancel the operation. * * The method will overwrite existing BibTeX-XMP-data, but keep other * existing metadata. * * @param file * The file to write the entries to. * @param bibtexEntries * The entries to write to the file. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @param writePDFInfo * Write information also in PDF document properties * @throws TransformerException * If the entry was malformed or unsupported. * @throws IOException * If the file could not be written to or could not be found. */ @SuppressWarnings("unchecked") public static void writeXMP(File file, Collection<BibtexEntry> bibtexEntries, BibtexDatabase database, boolean writePDFInfo) throws IOException, TransformerException { if (database != null) { bibtexEntries = database.resolveForStrings(bibtexEntries, false); } PDDocument document = null; try { document = PDDocument.load(file.getAbsoluteFile()); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document."); } if (writePDFInfo && (bibtexEntries.size() == 1)) { XMPUtil.writeDocumentInformation(document, bibtexEntries.iterator().next(), null); XMPUtil.writeDublinCore(document, bibtexEntries, null); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw != null) { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } else { meta = new XMPMetadata(); } meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); // Remove all current Bibtex-schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; bib.getElement().getParentNode().removeChild(bib.getElement()); } for (BibtexEntry e : bibtexEntries) { XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta); meta.addSchema(bibtex); bibtex.setBibtexEntry(e, null); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); // Save try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage()); } } finally { if (document != null) { document.close(); } } }
From source file:org.exoplatform.services.document.impl.PDFDocumentReader.java
License:Open Source License
public Properties getProperties(final InputStream is) throws IOException, DocumentReadException { try {/*from ww w. j a va 2s .c om*/ return SecurityHelper.doPrivilegedExceptionAction(new PrivilegedExceptionAction<Properties>() { public Properties run() throws Exception { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } PDDocument pdDocument = PDDocument.load(is); Properties props = new Properties(); try { if (pdDocument.isEncrypted()) { try { pdDocument.decrypt(""); } catch (InvalidPasswordException e) { throw new DocumentReadException("The pdf document is encrypted.", e); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { throw new DocumentReadException(e.getMessage(), e); } } PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); PDMetadata meta = catalog.getMetadata(); if (meta != null) { XMPMetadata metadata = meta.exportXMPMetadata(); XMPSchemaDublinCore dc = metadata.getDublinCoreSchema(); if (dc != null) { try { if (dc.getTitle() != null) props.put(DCMetaData.TITLE, fixEncoding(dc.getTitle())); } catch (Exception e) { LOG.warn("getTitle failed: " + e.getMessage()); } try { if (dc.getDescription() != null) props.put(DCMetaData.DESCRIPTION, fixEncoding(dc.getDescription())); } catch (Exception e) { LOG.warn("getSubject failed: " + e.getMessage()); } try { if (dc.getCreators() != null) { for (String creator : dc.getCreators()) { props.put(DCMetaData.CREATOR, fixEncoding(creator)); } } } catch (Exception e) { LOG.warn("getCreator failed: " + e.getMessage()); } try { if (dc.getDates() != null) { for (Calendar date : dc.getDates()) { props.put(DCMetaData.DATE, date); } } } catch (Exception e) { LOG.warn("getDate failed: " + e.getMessage()); } } XMPSchemaPDF pdf = metadata.getPDFSchema(); if (pdf != null) { try { if (pdf.getKeywords() != null) props.put(DCMetaData.SUBJECT, fixEncoding(pdf.getKeywords())); } catch (Exception e) { LOG.warn("getKeywords failed: " + e.getMessage()); } try { if (pdf.getProducer() != null) props.put(DCMetaData.PUBLISHER, fixEncoding(pdf.getProducer())); } catch (Exception e) { LOG.warn("getProducer failed: " + e.getMessage()); } } XMPSchemaBasic basic = metadata.getBasicSchema(); if (basic != null) { try { if (basic.getCreateDate() != null) props.put(DCMetaData.DATE, basic.getCreateDate()); } catch (Exception e) { LOG.warn("getCreationDate failed: " + e.getMessage()); } try { if (basic.getModifyDate() != null) props.put(DCMetaData.DATE, basic.getModifyDate()); } catch (Exception e) { LOG.warn("getModificationDate failed: " + e.getMessage()); } // DCMetaData.PUBLISHER - basic.getCreatorTool() } } if (props.isEmpty()) { // The pdf doesn't contain any metadata, try to use the document // information instead PDDocumentInformation docInfo = pdDocument.getDocumentInformation(); if (docInfo != null) { try { if (docInfo.getAuthor() != null) props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor()); } catch (Exception e) { LOG.warn("getAuthor failed: " + e.getMessage()); } try { if (docInfo.getCreationDate() != null) props.put(DCMetaData.DATE, docInfo.getCreationDate()); } catch (Exception e) { LOG.warn("getCreationDate failed: " + e.getMessage()); } try { if (docInfo.getCreator() != null) props.put(DCMetaData.CREATOR, docInfo.getCreator()); } catch (Exception e) { LOG.warn("getCreator failed: " + e.getMessage()); } try { if (docInfo.getKeywords() != null) props.put(DCMetaData.SUBJECT, docInfo.getKeywords()); } catch (Exception e) { LOG.warn("getKeywords failed: " + e.getMessage()); } try { if (docInfo.getModificationDate() != null) props.put(DCMetaData.DATE, docInfo.getModificationDate()); } catch (Exception e) { LOG.warn("getModificationDate failed: " + e.getMessage()); } try { if (docInfo.getProducer() != null) props.put(DCMetaData.PUBLISHER, docInfo.getProducer()); } catch (Exception e) { LOG.warn("getProducer failed: " + e.getMessage()); } try { if (docInfo.getSubject() != null) props.put(DCMetaData.DESCRIPTION, docInfo.getSubject()); } catch (Exception e) { LOG.warn("getSubject failed: " + e.getMessage()); } try { if (docInfo.getTitle() != null) props.put(DCMetaData.TITLE, docInfo.getTitle()); } catch (Exception e) { LOG.warn("getTitle failed: " + e.getMessage()); } // docInfo.getTrapped(); } } } finally { if (pdDocument != null) { pdDocument.close(); } if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return props; } }); } catch (PrivilegedActionException pae) { Throwable cause = pae.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new RuntimeException(cause); } } }