Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation.

Prototype

public PDDocumentInformation getDocumentInformation()

Source Link

Document

This will get the document info dictionary.

Usage

From source file:org.xstudiosys.pdfxmp.Main.java

License:Open Source License

public static void writeInfoDictionary(FileInputStream in, String outputFile, byte[] xmp)
        throws IOException, COSVisitorException {

    PDFParser parser = new PDFParser(in);
    parser.parse();//  www . jav a 2s  .  c om

    PDDocument document = parser.getPDDocument();
    PDDocumentInformation info = document.getDocumentInformation();
    /*
    for (Entry<String, String> entry : XmpUtils.toInfo(xmp).entrySet()) {
       info.setCustomMetadataValue(entry.getKey(), entry.getValue());
    }
    */
    document.setDocumentInformation(info);
    document.save(outputFile);
    document.close();
}

From source file:org.xstudiosys.pdfxmp.MarkBuilder.java

License:Open Source License

public void onComplete(PDDocument document) {
    try {/*ww w  .  ja v a  2s  .c om*/

        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDDocumentInformation info = document.getDocumentInformation();

        XMPMetadata metadata = new XMPMetadata();

        XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
        pdfSchema.setKeywords(info.getKeywords());
        pdfSchema.setProducer(info.getProducer());

        XMPSchemaBasic basicSchema = metadata.addBasicSchema();
        basicSchema.setModifyDate(info.getModificationDate());
        basicSchema.setCreateDate(info.getCreationDate());
        basicSchema.setCreatorTool(info.getCreator());
        basicSchema.setMetadataDate(new GregorianCalendar());

        XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
        dcSchema.setTitle(info.getTitle());
        dcSchema.addCreator("PDFBox");
        dcSchema.setDescription(info.getSubject());

        PDMetadata metadataStream = new PDMetadata(document);
        metadataStream.importXMPMetadata(metadata);
        catalog.setMetadata(metadataStream);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Try to read the given BibTexEntry from the XMP-stream of the given
 * inputstream containing a PDF-file.//from   w  w w. j a  v a 2s  .co  m
 * 
 * @param inputStream
 *            The inputstream to read from.
 * 
 * @throws IOException
 *             Throws an IOException if the file cannot be read, so the user
 *             than remove a lock or cancel the operation.
 */
@SuppressWarnings("unchecked")
public static List<BibtexEntry> readXMP(InputStream inputStream) throws IOException {

    List<BibtexEntry> result = new LinkedList<BibtexEntry>();

    PDDocument document = null;

    try {
        document = PDDocument.load(inputStream);
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
        }

        XMPMetadata meta = getXMPMetadata(document);

        // If we did not find any XMP metadata, search for non XMP metadata
        if (meta != null) {

            List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);

            for (XMPSchema schema : schemas) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;

                result.add(bib.getBibtexEntry());
            }

            // If we did not find anything have a look if a Dublin Core exists
            if (result.size() == 0) {
                schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                for (XMPSchema schema : schemas) {
                    XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;

                    BibtexEntry entry = getBibtexEntryFromDublinCore(dc);

                    if (entry != null)
                        result.add(entry);
                }
            }
        }
        if (result.size() == 0) {
            BibtexEntry entry = getBibtexEntryFromDocumentInformation(document.getDocumentInformation());

            if (entry != null)
                result.add(entry);
        }
    } finally {
        if (document != null)
            document.close();
    }

    // return null, if no metadata was found
    if (result.size() == 0)
        return null;
    return result;
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntry in the Document Information (the
 * properties of the pdf)./*  ww  w. j  a  va  2 s. co m*/
 * 
 * Existing fields values are overriden if the bibtex entry has the
 * corresponding value set.
 * 
 * @param document
 *            The pdf document to write to.
 * @param entry
 *            The Bibtex entry that is written into the PDF properties. *
 * @param database
 *            maybenull An optional database which the given bibtex entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 */
public static void writeDocumentInformation(PDDocument document, BibtexEntry entry, BibtexDatabase database) {

    PDDocumentInformation di = document.getDocumentInformation();

    if (database != null)
        entry = database.resolveForStrings(entry, false);

    // Query privacy filter settings
    /*
    JabRefPreferences prefs = JabRefPreferences.getInstance();
    boolean useXmpPrivacyFilter =
       prefs.getBoolean("useXmpPrivacyFilter");
    // Fields for which not to write XMP data later on:
    TreeSet<String> filters = new TreeSet<String>(Arrays.asList(prefs.getStringArray(JabRefPreferences.XMP_PRIVACY_FILTERS)));
        */
    // Set all the values including key and entryType
    Set<String> fields = entry.getAllFields();

    for (String field : fields) {
        /*
                 if (useXmpPrivacyFilter && filters.contains(field)) {
                    // erase field instead of adding it
                    if (field.equals("author")) {
                    di.setAuthor(null);
                    } else if (field.equals("title")) {
                    di.setTitle(null);
                    } else if (field.equals("keywords")) {
                    di.setKeywords(null);
                    } else if (field.equals("abstract")) {
                    di.setSubject(null);
                    } else {
                    di.setCustomMetadataValue("bibtex/" + field,
                                                  null);
                    }
                    continue;
                 }
        */
        if (field.equals("author")) {
            di.setAuthor(entry.getField("author"));
        } else if (field.equals("title")) {
            di.setTitle(entry.getField("title"));
        } else if (field.equals("keywords")) {
            di.setKeywords(entry.getField("keywords"));
        } else if (field.equals("abstract")) {
            di.setSubject(entry.getField("abstract"));
        } else {
            di.setCustomMetadataValue("bibtex/" + field, entry.getField(field));
        }
    }
    di.setCustomMetadataValue("bibtex/entrytype", entry.getType().getName());
}

From source file:se.mithlond.services.content.impl.ejb.report.PdfReportServiceBean.java

License:Apache License

/**
 * {@inheritDoc}/*from  w  w  w . j ava 2 s.c o  m*/
 */
@Override
public PDDocument createDocument(@NotNull final Membership activeMembership, @NotNull final String title) {

    // Check sanity
    Validate.notNull(activeMembership, "activeMembership");
    Validate.notEmpty(title, "title");

    // Create the document and add some metadata to it.
    final PDDocument toReturn = new PDDocument();
    final PDDocumentInformation pdd = toReturn.getDocumentInformation();

    pdd.setAuthor("" + activeMembership.getAlias());
    pdd.setProducer("Nazgl Services Excel Report Generator");
    pdd.setCreationDate(Calendar.getInstance());
    pdd.setTitle(title);

    // All Done.
    return toReturn;
}

From source file:se.mithlond.services.content.impl.ejb.report.PdfReportServiceBeanTest.java

License:Apache License

@Test
public void validateCreatingDocument() {

    // Assemble/*w  ww. java  2 s.c o  m*/

    // Act
    final PDDocument result = unitUnderTest.createDocument(memHaxx, "TestDocument");

    // Assert
    Assert.assertNotNull(result);

    final PDDocumentInformation docInfo = result.getDocumentInformation();
    Assert.assertNotNull(docInfo);
    Assert.assertEquals(memHaxx.getAlias(), docInfo.getAuthor());
    Assert.assertNotNull(docInfo.getCreationDate());
}

From source file:se.streamsource.streamflow.web.application.pdf.CasePdfGenerator.java

License:Apache License

public PDDocument getPdf() throws IOException {
    document.closeAndReturn();//w  w  w  .j a  va 2s .  c o m
    PDDocument generatedDoc = document.generateHeaderAndPageNumbers(headerFont, caseId,
            bundle.getString("printDate") + ": " + printedOn);

    generatedDoc.getDocumentInformation().setCreator("Streamflow");
    Calendar calendar = Calendar.getInstance();
    generatedDoc.getDocumentInformation().setCreationDate(calendar);
    generatedDoc.getDocumentInformation().setTitle(caseId);

    if (templateUri != null) {

        String attachmentId;
        try {
            attachmentId = new URI(templateUri).getSchemeSpecificPart();

            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            store.attachment(attachmentId).transferTo(Outputs.byteBuffer(baos));

            Underlay underlay = new Underlay();
            generatedDoc = underlay.underlay(generatedDoc, new ByteArrayInputStream(baos.toByteArray()));

        } catch (Exception e) {

            e.printStackTrace();
        }
    }

    return generatedDoc;
}

From source file:uk.bl.wa.tika.parser.pdf.pdfbox.PDFParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException {
    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    addMetadata(metadata, Metadata.TITLE, info.getTitle());
    addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
    addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
    addMetadata(metadata, "pdf:creator", info.getCreator());
    addMetadata(metadata, "pdf:producer", info.getProducer());
    addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    addMetadata(metadata, "created", info.getCreationDate());
    addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
    Calendar modified = info.getModificationDate();
    addMetadata(metadata, Metadata.LAST_MODIFIED, modified);

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList(new String[] { "Author", "Creator", "CreationDate", "ModDate",
            "Keywords", "Producer", "Subject", "Title", "Trapped" });
    if (info.getCOSObject() != null && info.getCOSObject().keySet() != null) {
        for (COSName key : info.getCOSObject().keySet()) {
            String name = key.getName();
            if (!handledMetadata.contains(name)) {
                addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
            }// w  w w  .j  a  v  a2 s  .  c om
        }
    }
    // ANJ Extensions:
    //
    //
    // Add other data of interest:
    metadata.set("pdf:version", "" + document.getDocument().getVersion());
    metadata.set("pdf:numPages", "" + document.getNumberOfPages());
    //metadata.set("pdf:cryptoMode", ""+getCryptoModeAsString(reader));
    //metadata.set("pdf:openedWithFullPermissions", ""+reader.isOpenedWithFullPermissions());
    metadata.set("pdf:encrypted", "" + document.isEncrypted());
    //metadata.set("pdf:metadataEncrypted", ""+document.isMetadataEncrypted());
    //metadata.set("pdf:128key", ""+reader.is128Key());
    //metadata.set("pdf:tampered", ""+reader.isTampered());
    try {
        if (document.getDocumentCatalog().getMetadata() != null) {
            XMPMetadata xmp = XMPMetadata.load(document.getDocumentCatalog().getMetadata().exportXMPMetadata());
            // There is a special class for grabbing data in the PDF schema - not sure it will add much here:
            // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here.
            XMPSchemaPDF pdfxmp = xmp.getPDFSchema();
            // Added a PDF/A schema class:
            xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class);
            XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class);
            if (pdfaxmp != null) {
                metadata.set("pdfaid:part", pdfaxmp.getPart());
                metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase();
                //metadata.set("pdfa:version", version );                    
                metadata.set("pdf:version", version);
            }
            // TODO WARN if this XMP version is inconsistent with document header version?
        }
    } catch (IOException e) {
        log.error("XMP Parsing failed: " + e);
        metadata.set("pdf:metadata-xmp-parse-failed", "" + e);
    }

    // Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSObject();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    metadata.set("pdf:version", baseVersion + " Adobe Extension Level " + el);
                }
                // TODO WARN if this embedded version is inconsistent with document header version?
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
    // End Of ANJ Extensions.
}

From source file:zhaw.PDFIndexer.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 * // ww w.j a v  a 2  s .  c  om
 * @param document
 *            The document to add the contents to.
 * @param is
 *            The stream to get the contents from.
 * @param documentLocation
 *            The location of the document, used just for debug messages.
 * @throws IOException
 *             If there is an error parsing the document.
 */
private void addContent(Document document, InputStream is, String documentLocation) throws IOException {
    PDDocument pdfDocument = null;
    PDFTextStripper stripper;
    try {
        pdfDocument = PDDocument.load(is);
        if (pdfDocument.isEncrypted()) {
            // Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        // create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        stripper = new PDFTextStripper();
        try {
            stripper.writeText(pdfDocument, writer);

        } catch (Exception e) {
            System.out.println("Error in stripper.writeText()");
        }
        String contents = writer.getBuffer().toString();

        StringReader reader = new StringReader(contents);
        addTextField(document, Indexer.contents, reader);
        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addTextField(document, Indexer.Author, info.getAuthor());
            try {
                addTextField(document, Indexer.created, info.getCreationDate());
            } catch (IOException io) {
                // ignore, bad date but continue with indexing
            }

            addTextField(document, Indexer.keywords, info.getKeywords());
            try {
                addTextField(document, Indexer.modified, info.getModificationDate());
            } catch (IOException io) {
                // ignore, bad date but continue with indexing
            }
            addTextField(document, "Subject", info.getSubject());
            addTextField(document, Indexer.Title, info.getTitle());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and
        // returned
        // with hit documents for display.
        addUnindexedField(document, Indexer.summary, summary);
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        // they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}