List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation
public PDDocumentInformation getDocumentInformation()
From source file:org.xstudiosys.pdfxmp.Main.java
License:Open Source License
public static void writeInfoDictionary(FileInputStream in, String outputFile, byte[] xmp) throws IOException, COSVisitorException { PDFParser parser = new PDFParser(in); parser.parse();// www . jav a 2s . c om PDDocument document = parser.getPDDocument(); PDDocumentInformation info = document.getDocumentInformation(); /* for (Entry<String, String> entry : XmpUtils.toInfo(xmp).entrySet()) { info.setCustomMetadataValue(entry.getKey(), entry.getValue()); } */ document.setDocumentInformation(info); document.save(outputFile); document.close(); }
From source file:org.xstudiosys.pdfxmp.MarkBuilder.java
License:Open Source License
public void onComplete(PDDocument document) { try {/*ww w . ja v a 2s .c om*/ PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentInformation info = document.getDocumentInformation(); XMPMetadata metadata = new XMPMetadata(); XMPSchemaPDF pdfSchema = metadata.addPDFSchema(); pdfSchema.setKeywords(info.getKeywords()); pdfSchema.setProducer(info.getProducer()); XMPSchemaBasic basicSchema = metadata.addBasicSchema(); basicSchema.setModifyDate(info.getModificationDate()); basicSchema.setCreateDate(info.getCreationDate()); basicSchema.setCreatorTool(info.getCreator()); basicSchema.setMetadataDate(new GregorianCalendar()); XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema(); dcSchema.setTitle(info.getTitle()); dcSchema.addCreator("PDFBox"); dcSchema.setDescription(info.getSubject()); PDMetadata metadataStream = new PDMetadata(document); metadataStream.importXMPMetadata(metadata); catalog.setMetadata(metadataStream); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.xstudiosys.pdfxmp.XMPUtil.java
License:Open Source License
/** * Try to read the given BibTexEntry from the XMP-stream of the given * inputstream containing a PDF-file.//from w w w. j a v a 2s .co m * * @param inputStream * The inputstream to read from. * * @throws IOException * Throws an IOException if the file cannot be read, so the user * than remove a lock or cancel the operation. */ @SuppressWarnings("unchecked") public static List<BibtexEntry> readXMP(InputStream inputStream) throws IOException { List<BibtexEntry> result = new LinkedList<BibtexEntry>(); PDDocument document = null; try { document = PDDocument.load(inputStream); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); } XMPMetadata meta = getXMPMetadata(document); // If we did not find any XMP metadata, search for non XMP metadata if (meta != null) { List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; result.add(bib.getBibtexEntry()); } // If we did not find anything have a look if a Dublin Core exists if (result.size() == 0) { schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema; BibtexEntry entry = getBibtexEntryFromDublinCore(dc); if (entry != null) result.add(entry); } } } if (result.size() == 0) { BibtexEntry entry = getBibtexEntryFromDocumentInformation(document.getDocumentInformation()); if (entry != null) result.add(entry); } } finally { if (document != null) document.close(); } // return null, if no metadata was found if (result.size() == 0) return null; return result; }
From source file:org.xstudiosys.pdfxmp.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the Document Information (the * properties of the pdf)./* ww w. j a va 2 s. co m*/ * * Existing fields values are overriden if the bibtex entry has the * corresponding value set. * * @param document * The pdf document to write to. * @param entry * The Bibtex entry that is written into the PDF properties. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. */ public static void writeDocumentInformation(PDDocument document, BibtexEntry entry, BibtexDatabase database) { PDDocumentInformation di = document.getDocumentInformation(); if (database != null) entry = database.resolveForStrings(entry, false); // Query privacy filter settings /* JabRefPreferences prefs = JabRefPreferences.getInstance(); boolean useXmpPrivacyFilter = prefs.getBoolean("useXmpPrivacyFilter"); // Fields for which not to write XMP data later on: TreeSet<String> filters = new TreeSet<String>(Arrays.asList(prefs.getStringArray(JabRefPreferences.XMP_PRIVACY_FILTERS))); */ // Set all the values including key and entryType Set<String> fields = entry.getAllFields(); for (String field : fields) { /* if (useXmpPrivacyFilter && filters.contains(field)) { // erase field instead of adding it if (field.equals("author")) { di.setAuthor(null); } else if (field.equals("title")) { di.setTitle(null); } else if (field.equals("keywords")) { di.setKeywords(null); } else if (field.equals("abstract")) { di.setSubject(null); } else { di.setCustomMetadataValue("bibtex/" + field, null); } continue; } */ if (field.equals("author")) { di.setAuthor(entry.getField("author")); } else if (field.equals("title")) { di.setTitle(entry.getField("title")); } else if (field.equals("keywords")) { di.setKeywords(entry.getField("keywords")); } else if (field.equals("abstract")) { di.setSubject(entry.getField("abstract")); } else { di.setCustomMetadataValue("bibtex/" + field, entry.getField(field)); } } di.setCustomMetadataValue("bibtex/entrytype", entry.getType().getName()); }
From source file:se.mithlond.services.content.impl.ejb.report.PdfReportServiceBean.java
License:Apache License
/** * {@inheritDoc}/*from w w w . j ava 2 s.c o m*/ */ @Override public PDDocument createDocument(@NotNull final Membership activeMembership, @NotNull final String title) { // Check sanity Validate.notNull(activeMembership, "activeMembership"); Validate.notEmpty(title, "title"); // Create the document and add some metadata to it. final PDDocument toReturn = new PDDocument(); final PDDocumentInformation pdd = toReturn.getDocumentInformation(); pdd.setAuthor("" + activeMembership.getAlias()); pdd.setProducer("Nazgl Services Excel Report Generator"); pdd.setCreationDate(Calendar.getInstance()); pdd.setTitle(title); // All Done. return toReturn; }
From source file:se.mithlond.services.content.impl.ejb.report.PdfReportServiceBeanTest.java
License:Apache License
@Test public void validateCreatingDocument() { // Assemble/*w ww. java 2 s.c o m*/ // Act final PDDocument result = unitUnderTest.createDocument(memHaxx, "TestDocument"); // Assert Assert.assertNotNull(result); final PDDocumentInformation docInfo = result.getDocumentInformation(); Assert.assertNotNull(docInfo); Assert.assertEquals(memHaxx.getAlias(), docInfo.getAuthor()); Assert.assertNotNull(docInfo.getCreationDate()); }
From source file:se.streamsource.streamflow.web.application.pdf.CasePdfGenerator.java
License:Apache License
public PDDocument getPdf() throws IOException { document.closeAndReturn();//w w w .j a va 2s . c o m PDDocument generatedDoc = document.generateHeaderAndPageNumbers(headerFont, caseId, bundle.getString("printDate") + ": " + printedOn); generatedDoc.getDocumentInformation().setCreator("Streamflow"); Calendar calendar = Calendar.getInstance(); generatedDoc.getDocumentInformation().setCreationDate(calendar); generatedDoc.getDocumentInformation().setTitle(caseId); if (templateUri != null) { String attachmentId; try { attachmentId = new URI(templateUri).getSchemeSpecificPart(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); store.attachment(attachmentId).transferTo(Outputs.byteBuffer(baos)); Underlay underlay = new Underlay(); generatedDoc = underlay.underlay(generatedDoc, new ByteArrayInputStream(baos.toByteArray())); } catch (Exception e) { e.printStackTrace(); } } return generatedDoc; }
From source file:uk.bl.wa.tika.parser.pdf.pdfbox.PDFParser.java
License:Apache License
private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException { PDDocumentInformation info = document.getDocumentInformation(); metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); addMetadata(metadata, Metadata.TITLE, info.getTitle()); addMetadata(metadata, Metadata.AUTHOR, info.getAuthor()); addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords()); addMetadata(metadata, "pdf:creator", info.getCreator()); addMetadata(metadata, "pdf:producer", info.getProducer()); addMetadata(metadata, Metadata.SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); // All remaining metadata is custom // Copy this over as-is List<String> handledMetadata = Arrays.asList(new String[] { "Author", "Creator", "CreationDate", "ModDate", "Keywords", "Producer", "Subject", "Title", "Trapped" }); if (info.getCOSObject() != null && info.getCOSObject().keySet() != null) { for (COSName key : info.getCOSObject().keySet()) { String name = key.getName(); if (!handledMetadata.contains(name)) { addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key)); }// w w w .j a v a2 s . c om } } // ANJ Extensions: // // // Add other data of interest: metadata.set("pdf:version", "" + document.getDocument().getVersion()); metadata.set("pdf:numPages", "" + document.getNumberOfPages()); //metadata.set("pdf:cryptoMode", ""+getCryptoModeAsString(reader)); //metadata.set("pdf:openedWithFullPermissions", ""+reader.isOpenedWithFullPermissions()); metadata.set("pdf:encrypted", "" + document.isEncrypted()); //metadata.set("pdf:metadataEncrypted", ""+document.isMetadataEncrypted()); //metadata.set("pdf:128key", ""+reader.is128Key()); //metadata.set("pdf:tampered", ""+reader.isTampered()); try { if (document.getDocumentCatalog().getMetadata() != null) { XMPMetadata xmp = XMPMetadata.load(document.getDocumentCatalog().getMetadata().exportXMPMetadata()); // There is a special class for grabbing data in the PDF schema - not sure it will add much here: // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here. XMPSchemaPDF pdfxmp = xmp.getPDFSchema(); // Added a PDF/A schema class: xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class); XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", pdfaxmp.getPart()); metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(); //metadata.set("pdfa:version", version ); metadata.set("pdf:version", version); } // TODO WARN if this XMP version is inconsistent with document header version? } } catch (IOException e) { log.error("XMP Parsing failed: " + e); metadata.set("pdf:metadata-xmp-parse-failed", "" + e); } // Attempt to determine Adobe extension level, if present: COSDictionary root = document.getDocumentCatalog().getCOSObject(); COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions")); if (extensions != null) { for (COSName extName : extensions.keySet()) { // If it's an Adobe one, interpret it to determine the extension level: if (extName.equals(COSName.getPDFName("ADBE"))) { COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName); if (adobeExt != null) { String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion")); int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel")); metadata.set("pdf:version", baseVersion + " Adobe Extension Level " + el); } // TODO WARN if this embedded version is inconsistent with document header version? } else { // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'. metadata.set("pdf:foundNonAdobeExtensionName", extName.getName()); } } } // End Of ANJ Extensions. }
From source file:zhaw.PDFIndexer.java
License:Apache License
/** * This will add the contents to the lucene document. * // ww w.j a v a 2 s . c om * @param document * The document to add the contents to. * @param is * The stream to get the contents from. * @param documentLocation * The location of the document, used just for debug messages. * @throws IOException * If there is an error parsing the document. */ private void addContent(Document document, InputStream is, String documentLocation) throws IOException { PDDocument pdfDocument = null; PDFTextStripper stripper; try { pdfDocument = PDDocument.load(is); if (pdfDocument.isEncrypted()) { // Just try using the default password and move on pdfDocument.decrypt(""); } // create a writer where to append the text content. StringWriter writer = new StringWriter(); stripper = new PDFTextStripper(); try { stripper.writeText(pdfDocument, writer); } catch (Exception e) { System.out.println("Error in stripper.writeText()"); } String contents = writer.getBuffer().toString(); StringReader reader = new StringReader(contents); addTextField(document, Indexer.contents, reader); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null) { addTextField(document, Indexer.Author, info.getAuthor()); try { addTextField(document, Indexer.created, info.getCreationDate()); } catch (IOException io) { // ignore, bad date but continue with indexing } addTextField(document, Indexer.keywords, info.getKeywords()); try { addTextField(document, Indexer.modified, info.getModificationDate()); } catch (IOException io) { // ignore, bad date but continue with indexing } addTextField(document, "Subject", info.getSubject()); addTextField(document, Indexer.Title, info.getTitle()); } int summarySize = Math.min(contents.length(), 500); String summary = contents.substring(0, summarySize); // Add the summary as an UnIndexed field, so that it is stored and // returned // with hit documents for display. addUnindexedField(document, Indexer.summary, summary); } catch (CryptographyException e) { throw new IOException("Error decrypting document(" + documentLocation + "): " + e); } catch (InvalidPasswordException e) { // they didn't suppply a password and the default of "" was wrong. throw new IOException( "Error: The document(" + documentLocation + ") is encrypted and will not be indexed."); } finally { if (pdfDocument != null) { pdfDocument.close(); } } }