Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:org.isisaddons.module.pdf.fixture.dom.templates.CustomerConfirmation.java

License:Apache License

/**
 * Loads the template pdf file and populates it with the order details
 *
 * @param order The order with the details for the pdf document
 * @return The populated PDF document//from w  ww.  java  2  s.  c o  m
 * @throws Exception If the loading or the populating of the document fails for some reason
 */
private PDDocument loadAndPopulateTemplate(Order order) throws Exception {
    PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfAsBytes));

    PDAcroForm pdfForm = pdfDocument.getDocumentCatalog().getAcroForm();

    List<PDField> fields = pdfForm.getFields();
    SortedSet<OrderLine> orderLines = order.getOrderLines();
    for (PDField field : fields) {

        String fullyQualifiedName = field.getFullyQualifiedName();
        if ("orderDate".equals(fullyQualifiedName)) {
            field.setValue(order.getDate().toString());
        } else if ("orderNumber".equals(fullyQualifiedName)) {
            field.setValue(order.getNumber());
        } else if ("customerName".equals(fullyQualifiedName)) {
            field.setValue(order.getCustomerName());
        } else if ("message".equals(fullyQualifiedName)) {
            String message = "You have ordered '" + orderLines.size() + "' products";
            field.setValue(message);
        } else if ("preferences".equals(fullyQualifiedName)) {
            field.setValue(order.getPreferences());
        }
    }

    int i = 1;
    Iterator<OrderLine> orderLineIterator = orderLines.iterator();
    while (i < 7 && orderLineIterator.hasNext()) {
        OrderLine orderLine = orderLineIterator.next();

        String descriptionFieldName = "orderLine|" + i + "|desc";
        pdfForm.getField(descriptionFieldName).setValue(orderLine.getDescription());

        String costFieldName = "orderLine|" + i + "|cost";
        pdfForm.getField(costFieldName).setValue(orderLine.getDescription());

        String quantityFieldName = "orderLine|" + i + "|quantity";
        pdfForm.getField(quantityFieldName).setValue(orderLine.getDescription());
        i++;
    }

    return pdfDocument;
}

From source file:org.jahia.modules.dm.thumbnails.impl.PDFBoxPDF2ImageConverterService.java

License:Open Source License

public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber)
        throws DocumentOperationException {
    BufferedImage image = null;//from   www. java 2s  .c  om

    long timer = System.currentTimeMillis();

    PDDocument pdfDoc = null;
    try {
        pdfDoc = PDDocument.load(pdfInputStream);
        PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber);
        image = page.convertToImage(imageType, resolution);

        if (image != null && logger.isDebugEnabled()) {
            logger.debug("Generated an image for the page {} of the supplied input stream in {} ms", pageNumber,
                    (System.currentTimeMillis() - timer));
        }
    } catch (IndexOutOfBoundsException e) {
        logger.warn("No page with the number {} found in the PDF document", pageNumber);
    } catch (IOException e) {
        throw new DocumentOperationException("Error occurred trying to generate an image for the page "
                + pageNumber + " of the supplied input stream", e);
    } finally {
        try {
            pdfDoc.close();
        } catch (Exception e) {
            // ignore
        }
    }

    return image;
}

From source file:org.jahia.modules.docviewer.PDFBoxPDF2ImageConverterService.java

License:Open Source License

public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws Exception {
    BufferedImage image = null;/*from   www.j a  v  a  2s. c  o  m*/

    PDDocument pdfDoc = null;
    try {
        pdfDoc = PDDocument.load(pdfInputStream);
        PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber);
        image = page.convertToImage(imageType, resolution);
    } catch (IndexOutOfBoundsException e) {
        logger.warn("No page with the number {} found in the PDF document", pageNumber);
    } finally {
        try {
            pdfDoc.close();
        } catch (Exception e) {
            // ignore
        }
    }

    return image;
}

From source file:org.knime.ext.textprocessing.nodes.source.parser.pdf.PDFDocumentParser.java

License:Open Source License

private Document parseInternal(final InputStream is) throws Exception {
    m_currentDoc = new DocumentBuilder(m_tokenizerName);
    m_currentDoc.setDocumentFile(new File(m_docPath));
    m_currentDoc.setDocumentType(m_type);
    m_currentDoc.addDocumentCategory(m_category);
    m_currentDoc.addDocumentSource(m_source);

    if (m_charset == null) {
        m_charset = Charset.defaultCharset();
    }/*from   w  w  w . ja  v a2  s .  c  o m*/

    PDDocument document = null;
    try {
        document = PDDocument.load(is);

        // extract text from pdf
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);
        String text = stripper.getText(document);
        m_currentDoc.addSection(text, SectionAnnotation.UNKNOWN);

        // extract meta data from pdf
        String title = null;
        String authors = null;

        if (m_filenameAsTitle) {
            title = m_docPath.toString().trim();
        }

        PDDocumentInformation information = document.getDocumentInformation();
        if (information != null) {
            if (!checkTitle(title)) {
                title = information.getTitle();
            }
            authors = information.getAuthor();
        }

        // if title meta data does not exist use first sentence
        if (!checkTitle(title)) {
            List<Section> sections = m_currentDoc.getSections();
            if (sections.size() > 0) {
                try {
                    title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim();
                } catch (IndexOutOfBoundsException e) {
                    LOGGER.debug("Parsed PDF document " + m_docPath + " is empty.");
                    title = "";
                }
            }
        }
        // if no useful first sentence exist use filename
        if (!checkTitle(title)) {
            title = m_docPath.toString().trim();
        }
        m_currentDoc.addTitle(title);

        // use author meta data
        if (authors != null) {
            Set<Author> authSet = AuthorUtil.parseAuthors(authors);
            for (Author a : authSet) {
                m_currentDoc.addAuthor(a);
            }
        }

        // add document to list
        return m_currentDoc.createDocument();
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 *///from  w  w w. j  av a 2  s  .  c  om
private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper)
        throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        } else {
            stripper.resetEngine();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();
        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addField(document, "contents", contents);

        addField(document, "stemmedcontents", contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addField(document, "Author", info.getAuthor());
            try {
                addField(document, "CreationDate", info.getCreationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Creator", info.getCreator());
            addField(document, "Keywords", info.getKeywords());
            try {
                addField(document, "ModificationDate", info.getModificationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Producer", info.getProducer());
            addField(document, "Subject", info.getSubject());
            addField(document, "Title", info.getTitle());
            addField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addField(document, "summary", summary);
        addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages()));
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

private PDDocument getPdfDocument(byte[] pdfFileData) throws Exception {
    InputStream is = new ByteArrayInputStream(pdfFileData);
    PDDocument originalDocument = PDDocument.load(is);
    return originalDocument;
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

private ByteArrayOutputStream getOriginalPdfDocumentAsOutputsStream(byte[] pdfFileData) throws Exception {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfFileData);
    PDDocument originalDocument = PDDocument.load(is);
    originalDocument.save(outputStream);
    originalDocument.close();/*from  w  w w  .  java  2  s  . c o m*/
    return outputStream;
}

From source file:org.kuali.kra.printing.service.impl.PersonSignatureServiceImpl.java

License:Educational Community License

/**
 * This method is to remove interactive fields from the form.
 * @param pdfBytes/*  ww  w  .  j  a  v  a 2s.  c o  m*/
 * @return
 * @throws Exception
 */
protected ByteArrayOutputStream getFlattenedPdfForm(byte[] pdfBytes) throws Exception {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfBytes);
    PDDocument pdDoc = PDDocument.load(is);
    PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
    PDAcroForm acroForm = pdCatalog.getAcroForm();
    COSDictionary acroFormDict = acroForm.getDictionary();
    COSArray fields = (COSArray) acroFormDict.getDictionaryObject("Fields");
    fields.clear();
    pdDoc.save(byteArrayOutputStream);
    return byteArrayOutputStream;
}

From source file:org.lucee.extension.pdf.PDFStruct.java

License:Open Source License

public PDDocument toPDDocument() throws CryptographyException, InvalidPasswordException, IOException {
    PDDocument doc;// w w  w.  j av a2  s . co  m
    if (barr != null)
        doc = PDDocument.load(new ByteArrayInputStream(barr, 0, barr.length));
    else if (resource instanceof File)
        doc = PDDocument.load((File) resource);
    else
        doc = PDDocument.load(new ByteArrayInputStream(PDFUtil.toBytes(resource), 0, barr.length));

    if (password != null)
        doc.decrypt(password);

    return doc;

}

From source file:org.mabb.fontverter.opentype.DebugGlyphDrawer.java

License:Open Source License

@Test
public void given_type0_withCFF_HelveticaNeueBug() throws Exception {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf"));

    PDFont rawType0Font = extractFont(doc, "TCQDAA+HelveticaNeue-Light-Identity-H");
    OpenTypeFont font = (OpenTypeFont) PdfFontExtractor.convertType0FontToOpenType((PDType0Font) rawType0Font);
    TestUtils.saveTempFile(font.getData(), "TCQDAA+HelveticaNeue-Light-Identity-H.ttf");

    FileUtils.writeByteArrayToFile(//from   w w  w . j a va 2 s .  c om
            new File("C:/projects/Pdf2Dom/fontTest/TCQDAA+HelveticaNeue-Light-Identity-H.ttf"), font.getData());
    List<TtfGlyph> glyphs = font.getGlyfTable().getNonEmptyGlyphs();
    TtfGlyph glyph = glyphs.get(1);
    List<TtfInstructionParser.TtfInstruction> instructions = glyph.getInstructions();

    DebugGlyphDrawer.drawGlyph(glyph);
}