Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation.

Prototype

public PDDocumentInformation getDocumentInformation() 

Source Link

Document

This will get the document info dictionary.

Usage

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract metadata from the PDF-document.
 *//*from  w ww  .  ja  v a 2  s .  com*/
protected void extractMetaData(IParserDocument parserDoc, PDDocument pddDoc) throws IOException {
    // extract metadata
    final PDDocumentInformation metadata = pddDoc.getDocumentInformation();
    if (metadata == null)
        return;

    // document title
    final String title = metadata.getTitle();
    if (title != null && title.length() > 0)
        parserDoc.setTitle(title);

    // document author(s)
    final String author = metadata.getAuthor();
    if (author != null && author.length() > 0)
        parserDoc.setAuthor(author);
    ;

    // subject
    final String summary = metadata.getSubject();
    if (summary != null && summary.length() > 0)
        parserDoc.setSummary(summary);

    // keywords
    final String keywords = metadata.getKeywords();
    if (keywords != null && keywords.length() > 0) {
        String[] keywordArray = keywords.split("[,;\\s]");
        if (keywordArray != null && keywordArray.length > 0) {
            parserDoc.setKeywords(Arrays.asList(keywordArray));
        }
    }

    // last modification date
    final Calendar lastMod = metadata.getModificationDate();
    if (lastMod != null) {
        parserDoc.setLastChanged(lastMod.getTime());
    }
}

From source file:org.pdfmetamodifier.IOHelper.java

License:Apache License

/**
 * Save Metadata./*from  w  w w  .ja v  a 2s  .com*/
 * 
 * @param pdfFile
 *            Source PDF file.
 * @param metadataFile
 *            File with Metadata in user-frendly format.
 * @throws IOException
 */
/*
 * See:
 *      https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java?view=markup
 */
public static void saveMetadata(final File pdfFile, final File metadataFile) throws IOException {
    PDDocument document = null;
    try {
        // Read PDF file.
        document = PDDocument.load(pdfFile);
        if (document.isEncrypted()) {
            throw new IOException("Document is encrypted.");
        }

        // Get data from PDF file.
        final PDDocumentInformation information = document.getDocumentInformation();

        // Convert.
        final List<String> lines = MetadataHelper.metadataToLineList(information);

        // Write line list into the text file.
        Files.write(metadataFile.toPath(), lines);
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.pdfsam.pdf.DefaultPDFBoxLoader.java

License:Open Source License

public void accept(PDDocument document, PdfDocumentDescriptor descriptor) {
    descriptor.pages(document.getNumberOfPages());
    descriptor.setVersion(getVersion(Float.toString(document.getVersion())));
    PDDocumentInformation info = document.getDocumentInformation();
    descriptor.putInformation(PdfMetadataKey.TITLE.getKey(), info.getTitle());
    descriptor.putInformation(PdfMetadataKey.AUTHOR.getKey(), info.getAuthor());
    descriptor.putInformation(PdfMetadataKey.CREATOR.getKey(), info.getCreator());
    descriptor.putInformation(PdfMetadataKey.SUBJECT.getKey(), info.getSubject());
    descriptor.putInformation(PdfMetadataKey.KEYWORDS.getKey(), info.getKeywords());
    descriptor.putInformation("Producer", info.getProducer());
    Optional.ofNullable(info.getCreationDate()).map(FORMATTER::format)
            .ifPresent(c -> descriptor.putInformation("FormattedCreationDate", c));
}

From source file:org.terrier.indexing.PDFDocument.java

License:Mozilla Public License

/** 
 * Returns the reader of text, which is suitable for parsing terms out of,
 * and which is created by converting the file represented by 
 * parameter docStream. This method involves running the stream 
 * through the PDFParser etc provided in the org.pdfbox library.
 * On error, it returns null, and sets EOD to true, so no terms 
 * can be read from this document.//w  ww . j a v  a 2s  .c o m
 * @param is the input stream that represents the document's file.
 * @return Reader a reader that is fed to an indexer.
 */
protected Reader getReader(InputStream is) {

    if ((Files.length(filename) / 1048576) > 300) {
        logger.info("Skipping document " + filename + " because it's size exceeds 300Mb");
        return new StringReader("");
    }

    PDDocument pdfDocument = null;
    Reader rtr = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.writeText(pdfDocument, writer);

        String contents = writer.getBuffer().toString();
        int spaceCount = StringUtils.countMatches(contents, " ");
        for (char badChar : new char[] { '\u00A0', '\u2029', '#' }) {
            final int count = StringUtils.countMatches(contents, "" + badChar);
            if (count > spaceCount / 2) {
                contents = contents.replace(badChar, ' ');
                spaceCount += count;
            }
        }
        rtr = new StringReader(contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null && USE_PDF_TITLE) {
            setProperty("title", info.getTitle());
        } else {
            setProperty("title", new java.io.File(super.filename).getName());
        }
    } catch (CryptographyException e) {
        throw new RuntimeException("Error decrypting PDF document: " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new RuntimeException("Error: The PDF document is encrypted and will not be indexed.");
    } catch (Exception e) {
        throw new RuntimeException("Error extracting PDF document", e);
    } finally {
        if (pdfDocument != null) {
            try {
                pdfDocument.close();
            } catch (IOException ioe) {
            }
        }
    }
    return rtr;
}

From source file:org.wandora.application.tools.extractors.files.SimpleDocumentExtractor.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap, Topic topic) {
    try {/*from   ww w  .  j  ava 2 s. c om*/
        String name = locator;
        if (name.indexOf("/") != -1) {
            name = name.substring(name.lastIndexOf("/") + 1);
        } else if (name.indexOf("\\") != -1) {
            name = name.substring(name.lastIndexOf("\\") + 1);
        }
        String lowerCaseLocator = locator.toLowerCase();

        // --- HANDLE PDF ENRICHMENT TEXT ---
        if (lowerCaseLocator.endsWith("pdf")) {
            PDDocument doc = PDDocument.load(locator);
            PDDocumentInformation info = doc.getDocumentInformation();
            DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT);

            // --- PDF PRODUCER ---
            String producer = info.getProducer();
            if (producer != null && producer.length() > 0) {
                Topic producerType = createTopic(topicMap, "pdf-producer");
                setData(topic, producerType, defaultLang, producer.trim());
            }

            // --- PDF MODIFICATION DATE ---
            Calendar mCal = info.getModificationDate();
            if (mCal != null) {
                String mdate = dateFormatter.format(mCal.getTime());
                if (mdate != null && mdate.length() > 0) {
                    Topic modificationDateType = createTopic(topicMap, "pdf-modification-date");
                    setData(topic, modificationDateType, defaultLang, mdate.trim());
                }
            }

            // --- PDF CREATOR ---
            String creator = info.getCreator();
            if (creator != null && creator.length() > 0) {
                Topic creatorType = createTopic(topicMap, "pdf-creator");
                setData(topic, creatorType, defaultLang, creator.trim());
            }

            // --- PDF CREATION DATE ---
            Calendar cCal = info.getCreationDate();
            if (cCal != null) {
                String cdate = dateFormatter.format(cCal.getTime());
                if (cdate != null && cdate.length() > 0) {
                    Topic creationDateType = createTopic(topicMap, "pdf-creation-date");
                    setData(topic, creationDateType, defaultLang, cdate.trim());
                }
            }

            // --- PDF AUTHOR ---
            String author = info.getAuthor();
            if (author != null && author.length() > 0) {
                Topic authorType = createTopic(topicMap, "pdf-author");
                setData(topic, authorType, defaultLang, author.trim());
            }

            // --- PDF SUBJECT ---
            String subject = info.getSubject();
            if (subject != null && subject.length() > 0) {
                Topic subjectType = createTopic(topicMap, "pdf-subject");
                setData(topic, subjectType, defaultLang, subject.trim());
            }

            // --- PDF TITLE ---
            String title = info.getSubject();
            if (title != null && title.length() > 0) {
                Topic titleType = createTopic(topicMap, "pdf-title");
                setData(topic, titleType, defaultLang, title.trim());
            }

            // --- PDF KEYWORDS (SEPARATED WITH SEMICOLON) ---
            String keywords = info.getKeywords();
            if (keywords != null && keywords.length() > 0) {
                Topic keywordType = createTopic(topicMap, "pdf-keyword");
                String[] keywordArray = keywords.split(";");
                String keyword = null;
                for (int i = 0; i < keywordArray.length; i++) {
                    keyword = Textbox.trimExtraSpaces(keywordArray[i]);
                    if (keyword != null && keyword.length() > 0) {
                        Topic keywordTopic = createTopic(topicMap, keyword, keywordType);
                        createAssociation(topicMap, keywordType, new Topic[] { topic, keywordTopic });
                    }
                }
            }

            // --- PDF TEXT CONTENT ---
            PDFTextStripper stripper = new PDFTextStripper();
            String content = stripper.getText(doc);
            doc.close();
            setTextEnrichment(topic, topicMap, content, name);
        }

        // --- HANDLE RTF DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("rtf")) {
            String content = Textbox.RTF2PlainText(inputStream);
            setTextEnrichment(topic, topicMap, content, name);
        }

        // --- HANDLE OFFICE DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("doc") || lowerCaseLocator.endsWith("docx")
                || lowerCaseLocator.endsWith("ppt") || lowerCaseLocator.endsWith("xls")
                || lowerCaseLocator.endsWith("vsd")) {
            String content = MSOfficeBox.getText(inputStream);
            if (content != null) {
                setTextEnrichment(topic, topicMap, content, name);
            }
        }

        else if (lowerCaseLocator.endsWith("odt") || lowerCaseLocator.endsWith("odp")
                || lowerCaseLocator.endsWith("odg") || lowerCaseLocator.endsWith("ods")) {

            org.odftoolkit.simple.Document oodocument = org.odftoolkit.simple.Document
                    .loadDocument(inputStream);
            String content = OpenOfficeBox.getText(oodocument);
            setTextEnrichment(topic, topicMap, content, name);

            org.odftoolkit.simple.meta.Meta meta = oodocument.getOfficeMetadata();

            // --- OO KEYWORDS ---
            List<String> keywords = meta.getKeywords();
            if (keywords != null && !keywords.isEmpty()) {
                Topic keywordType = createTopic(topicMap, "oo-keyword");
                for (String keyword : keywords) {
                    keyword = keyword.trim();
                    if (keyword != null && keyword.length() > 0) {
                        Topic keywordTopic = createTopic(topicMap, keyword, keywordType);
                        createAssociation(topicMap, keywordType, new Topic[] { topic, keywordTopic });
                    }
                }
            }

            // --- OO TITLE ---
            String title = meta.getTitle();
            if (title != null && title.length() > 0) {
                Topic titleType = createTopic(topicMap, "oo-title");
                setData(topic, titleType, defaultLang, title.trim());
            }

            // --- OO SUBJECT ---
            String subject = meta.getSubject();
            if (subject != null && subject.length() > 0) {
                Topic subjectType = createTopic(topicMap, "oo-subject");
                setData(topic, subjectType, defaultLang, subject.trim());
            }

            // --- OO CREATOR ---
            String author = meta.getCreator();
            if (author != null && author.length() > 0) {
                Topic authorType = createTopic(topicMap, "oo-author");
                setData(topic, authorType, defaultLang, author.trim());
            }

            // --- OO CREATION DATE ---
            Calendar cCal = meta.getCreationDate();
            if (cCal != null) {
                DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT);
                String cdate = dateFormatter.format(cCal.getTime());
                if (cdate != null && cdate.length() > 0) {
                    Topic creationDateType = createTopic(topicMap, "oo-creation-date");
                    setData(topic, creationDateType, defaultLang, cdate.trim());
                }
            }

            // --- OO DESCRIPTION ---
            String description = meta.getDescription();
            if (description != null && description.length() > 0) {
                Topic descriptionType = createTopic(topicMap, "oo-description");
                setData(topic, descriptionType, defaultLang, description.trim());
            }

            // --- OO GENERATOR ---
            String generator = meta.getGenerator();
            if (generator != null && generator.length() > 0) {
                Topic generatorType = createTopic(topicMap, "oo-generator");
                setData(topic, generatorType, defaultLang, generator.trim());
            }
        }

        else if (lowerCaseLocator.endsWith("html") || lowerCaseLocator.endsWith("htm")) {
            String content = IObox.loadFile(new InputStreamReader(inputStream));
            setTextEnrichment(topic, topicMap, content, name);
        }

        else if (lowerCaseLocator.endsWith("txt") || lowerCaseLocator.endsWith("text")) {
            String content = IObox.loadFile(new InputStreamReader(inputStream));
            setTextEnrichment(topic, topicMap, content, name);
        }

        // --- HANDLE ANY OTHER DOCUMENTS ---
        else {
            byte[] content = IObox.loadBFile(inputStream);
            String mimeType = "";
            MimeUtil.registerMimeDetector("eu.medsea.mimeutil.detector.MagicMimeMimeDetector");
            Collection<MimeType> mimeTypes = new ArrayList();
            if (locator != null) {
                if (MimeTypes.getMimeType(locator) != null) {
                    mimeTypes.add(new MimeType(MimeTypes.getMimeType(locator)));
                }
                mimeTypes.addAll(MimeUtil.getMimeTypes(locator));
            }
            mimeTypes.addAll(MimeUtil.getMimeTypes(content));
            boolean isText = false;
            for (MimeType mime : mimeTypes) {
                if (MimeUtil.isTextMimeType(mime)) {
                    isText = true;
                    break;
                }
            }
            if (isText) {
                setTextEnrichment(topic, topicMap, new String(content), name);
            } else {
                if (!mimeTypes.isEmpty()) {
                    MimeType mime = mimeTypes.iterator().next();
                    mimeType = mime.toString();
                }
                setBinaryEnrichment(topic, topicMap, content, mimeType);
            }
        }
    } catch (Exception e) {
        log(e);
    }
}

From source file:org.wandora.application.tools.extractors.files.SimplePDFExtractor.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap,
        Topic pdfTopic) {/*from   ww w  .j  a  v a 2  s  .c o  m*/
    PDDocument doc = null;
    try {
        if (locator.startsWith("http://")) {
            doc = PDDocument.load(new URL(locator));
        } else {
            doc = PDDocument.load(new File(locator));
        }
        PDDocumentInformation info = doc.getDocumentInformation();
        DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT);

        // --- PDF PRODUCER ---
        String producer = info.getProducer();
        if (producer != null && producer.length() > 0) {
            Topic producerType = createTopic(topicMap, "pdf-producer");
            setData(pdfTopic, producerType, defaultLang, producer.trim());
        }

        // --- PDF MODIFICATION DATE ---
        Calendar mCal = info.getModificationDate();
        if (mCal != null) {
            String mdate = dateFormatter.format(mCal.getTime());
            if (mdate != null && mdate.length() > 0) {
                Topic modificationDateType = createTopic(topicMap, "pdf-modification-date");
                setData(pdfTopic, modificationDateType, defaultLang, mdate.trim());
            }
        }

        // --- PDF CREATOR ---
        String creator = info.getCreator();
        if (creator != null && creator.length() > 0) {
            Topic creatorType = createTopic(topicMap, "pdf-creator");
            setData(pdfTopic, creatorType, defaultLang, creator.trim());
        }

        // --- PDF CREATION DATE ---
        Calendar cCal = info.getCreationDate();
        if (cCal != null) {
            String cdate = dateFormatter.format(cCal.getTime());
            if (cdate != null && cdate.length() > 0) {
                Topic creationDateType = createTopic(topicMap, "pdf-creation-date");
                setData(pdfTopic, creationDateType, defaultLang, cdate.trim());
            }
        }

        // --- PDF AUTHOR ---
        String author = info.getAuthor();
        if (author != null && author.length() > 0) {
            Topic authorType = createTopic(topicMap, "pdf-author");
            setData(pdfTopic, authorType, defaultLang, author.trim());
        }

        // --- PDF SUBJECT ---
        String subject = info.getSubject();
        if (subject != null && subject.length() > 0) {
            Topic subjectType = createTopic(topicMap, "pdf-subject");
            setData(pdfTopic, subjectType, defaultLang, subject.trim());
        }

        // --- PDF TITLE ---
        String title = info.getSubject();
        if (title != null && title.length() > 0) {
            if (makeVariantFromTitle) {
                pdfTopic.setDisplayName(defaultLang, title);
            } else {
                Topic titleType = createTopic(topicMap, "pdf-title");
                setData(pdfTopic, titleType, defaultLang, title.trim());
            }
        }

        // --- PDF KEYWORDS (SEPARATED WITH SEMICOLON) ---
        String keywords = info.getKeywords();
        if (keywords != null && keywords.length() > 0) {
            Topic keywordType = createTopic(topicMap, "pdf-keyword");
            String[] keywordArray = keywords.split(";");
            String keyword = null;
            for (int i = 0; i < keywordArray.length; i++) {
                keyword = Textbox.trimExtraSpaces(keywordArray[i]);
                if (keyword != null && keyword.length() > 0) {
                    Topic keywordTopic = createTopic(topicMap, keyword, keywordType);
                    createAssociation(topicMap, keywordType, new Topic[] { pdfTopic, keywordTopic });
                }
            }
        }

        // --- PDF TEXT CONTENT ---
        PDFTextStripper stripper = new PDFTextStripper();
        String content = new String();

        if (makePageTopics) {
            int pages = doc.getNumberOfPages();
            String pageContent = null;
            for (int i = 0; i < pages; i++) {
                stripper.setStartPage(i);
                stripper.setEndPage(i);
                pageContent = stripper.getText(doc);
                Topic pageType = createTopic(topicMap, "pdf-page");
                Topic pageTopic = createTopic(topicMap, pdfTopic.getBaseName() + " (page " + i + ")", pageType);
                Topic orderType = createTopic(topicMap, "order");
                Topic orderTopic = createTopic(topicMap, i + ".", orderType);
                Topic contentType = createTopic(topicMap, "pdf-text");
                setData(pageTopic, contentType, defaultLang, pageContent.trim());
                createAssociation(topicMap, pageType, new Topic[] { pdfTopic, pageTopic, orderTopic });
            }
        } else {
            content = stripper.getText(doc);
        }

        if (!makePageTopics && content != null && content.length() > 0) {
            Topic contentType = createTopic(topicMap, "pdf-text");
            setData(pdfTopic, contentType, defaultLang, content.trim());
        }
        doc.close();
    } catch (Exception e) {
        e.printStackTrace();
        try {
            if (doc != null)
                doc.close();
        } catch (Exception ix) {
            e.printStackTrace();
        }
    }
}

From source file:org.wandora.application.tools.extractors.fng.ExtractFNGTextEnrichment.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap,
        Topic textTopic) {//from w w w  . j  av  a  2 s.c  om
    try {
        String lowerCaseLocator = locator.toLowerCase();

        // --- HANDLE PDF ENRICHMENT TEXT ---
        if (lowerCaseLocator.endsWith("pdf")) {

            PDDocument doc = PDDocument.load(new URL(locator));
            PDDocumentInformation info = doc.getDocumentInformation();

            // --- PDF SUBJECT ---
            String subject = info.getSubject();
            if (subject != null && subject.length() > 0) {
                Topic subjectType = createTopic(topicMap, "subject");
                setData(textTopic, subjectType, defaultLang, subject.trim());
            }

            // --- PDF TITLE ---
            String title = info.getTitle();
            if (title != null && title.length() > 0) {
                Topic titleType = createTopic(topicMap, "title");
                setData(textTopic, titleType, defaultLang, title.trim());
            }

            // --- PDF KEYWORDS ---
            String keywords = info.getKeywords();
            if (keywords != null && keywords.length() > 0) {
                Topic keywordType = createTopic(topicMap, "keywords");
                setData(textTopic, keywordType, defaultLang, keywords.trim());
            }

            // --- PDF TEXT CONTENT ---
            PDFTextStripper stripper = new PDFTextStripper();
            String content = stripper.getText(doc);
            setTextEnrichment(textTopic, topicMap, content);
            doc.close();
        }

        // --- HANDLE RTF DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("rtf")) {
            String content = Textbox.RTF2PlainText(inputStream);
            setTextEnrichment(textTopic, topicMap, content);
        }

        // --- HANDLE OFFICE DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("doc") || lowerCaseLocator.endsWith("docx")
                || lowerCaseLocator.endsWith("ppt") || lowerCaseLocator.endsWith("xsl")
                || lowerCaseLocator.endsWith("vsd")) {
            String content = MSOfficeBox.getText(inputStream);
            if (content != null) {
                setTextEnrichment(textTopic, topicMap, content);
            }
        }

        // --- HANDLE TXT DOCUMENTS ---
        else {
            String content = IObox.loadFile(new InputStreamReader(inputStream));
            setTextEnrichment(textTopic, topicMap, content);
        }
    } catch (Exception e) {
        log(e);
    }
}

From source file:org.wandora.piccolo.utils.crawler.handlers.PDFHandler.java

License:Open Source License

public void handle(CrawlerAccess crawler, InputStream in, int depth, URL page) {
    try {//from  ww w.ja  v  a  2  s  . c om
        Document d = new Document();

        PDDocument doc = PDDocument.load(page);
        PDDocumentInformation info = doc.getDocumentInformation();
        PDFTextStripper stripper = new PDFTextStripper();
        String content = stripper.getText(doc);
        doc.close();

        d.add(LuceneCrawler.subject(info.getSubject()));
        d.add(LuceneCrawler.title(info.getTitle()));
        d.add(LuceneCrawler.keywords(info.getKeywords()));
        d.add(LuceneCrawler.content(content));
        d.add(LuceneCrawler.location(page.toString()));

        crawler.addObject(d);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.wso2.carbon.apimgt.impl.reportgen.ReportGenerator.java

License:Open Source License

/**
 * Generate PDF file for API microgateway request summary
 *
 * @param table object containing table headers and row data
 * @return InputStream pdf as a stream/*from  w  w w  . j  a  v a 2s .  c  om*/
 * @throws IOException
 * @throws COSVisitorException
 */
public InputStream generateMGRequestSummeryPDF(TableData table) throws IOException, COSVisitorException {

    String[] columnHeaders = table.getColumnHeaders();

    PDDocument document = new PDDocument();
    PDPage page = new PDPage();
    page.setMediaBox(PDPage.PAGE_SIZE_A4);
    page.setRotation(0);
    document.addPage(page);

    PDPageContentStream contentStream = new PDPageContentStream(document, page, false, false);

    // add logo
    InputStream in = APIManagerComponent.class.getResourceAsStream("/report/wso2-logo.jpg");
    PDJpeg img = new PDJpeg(document, in);
    contentStream.drawImage(img, 375, 755);

    // Add topic
    contentStream.setFont(PDType1Font.HELVETICA_BOLD, 16);
    writeContent(contentStream, CELL_MARGIN, 770, "API Microgateway request summary");

    // Add generated time
    contentStream.setFont(PDType1Font.HELVETICA_BOLD, FONT_SIZE);
    writeContent(contentStream, CELL_MARGIN, 730, "Report generated on: " + new Date().toString());

    contentStream.setFont(TEXT_FONT, FONT_SIZE);

    // add table with data
    drowTableGrid(contentStream, table.getRows().size());
    writeRowsContent(contentStream, columnHeaders, table.getRows());

    // Add meta data
    // Whenever the summary report structure is updated this should be changed
    String requestCount = table.getRows().get(0).getEntries().get(2);
    document.getDocumentInformation().setCustomMetadataValue(MGW_META, getMetaCount(requestCount));

    contentStream.close();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    document.save(out);
    document.close();

    return new ByteArrayInputStream(out.toByteArray());

}

From source file:org.xstudiosys.pdfxmp.AddMetadataFromDocInfo.java

License:Apache License

/**
 * This will print the documents data./*from  w  ww.j  a  va2s . c om*/
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        usage();
    } else {
        PDDocument document = null;

        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                System.err.println("Error: Cannot add metadata to encrypted document.");
                System.exit(1);
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDDocumentInformation info = document.getDocumentInformation();

            XMPMetadata metadata = new XMPMetadata();

            XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
            pdfSchema.setKeywords(info.getKeywords());
            pdfSchema.setProducer(info.getProducer());

            XMPSchemaBasic basicSchema = metadata.addBasicSchema();
            basicSchema.setModifyDate(info.getModificationDate());
            basicSchema.setCreateDate(info.getCreationDate());
            basicSchema.setCreatorTool(info.getCreator());
            basicSchema.setMetadataDate(new GregorianCalendar());

            XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
            dcSchema.setTitle(info.getTitle());
            dcSchema.addCreator("PDFBox");
            dcSchema.setDescription(info.getSubject());

            PDMetadata metadataStream = new PDMetadata(document);
            metadataStream.importXMPMetadata(metadata);
            catalog.setMetadata(metadataStream);

            document.save(args[1]);
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}