Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.isisaddons.module.pdf.fixture.dom.templates.CustomerConfirmation.java

License:Apache License

/**
 * Loads the template pdf file and populates it with the order details
 *
 * @param order The order with the details for the pdf document
 * @return The populated PDF document//from w  ww.  java  2  s.  c o  m
 * @throws Exception If the loading or the populating of the document fails for some reason
 */
private PDDocument loadAndPopulateTemplate(Order order) throws Exception {
    PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfAsBytes));

    PDAcroForm pdfForm = pdfDocument.getDocumentCatalog().getAcroForm();

    List<PDField> fields = pdfForm.getFields();
    SortedSet<OrderLine> orderLines = order.getOrderLines();
    for (PDField field : fields) {

        String fullyQualifiedName = field.getFullyQualifiedName();
        if ("orderDate".equals(fullyQualifiedName)) {
            field.setValue(order.getDate().toString());
        } else if ("orderNumber".equals(fullyQualifiedName)) {
            field.setValue(order.getNumber());
        } else if ("customerName".equals(fullyQualifiedName)) {
            field.setValue(order.getCustomerName());
        } else if ("message".equals(fullyQualifiedName)) {
            String message = "You have ordered '" + orderLines.size() + "' products";
            field.setValue(message);
        } else if ("preferences".equals(fullyQualifiedName)) {
            field.setValue(order.getPreferences());
        }
    }

    int i = 1;
    Iterator<OrderLine> orderLineIterator = orderLines.iterator();
    while (i < 7 && orderLineIterator.hasNext()) {
        OrderLine orderLine = orderLineIterator.next();

        String descriptionFieldName = "orderLine|" + i + "|desc";
        pdfForm.getField(descriptionFieldName).setValue(orderLine.getDescription());

        String costFieldName = "orderLine|" + i + "|cost";
        pdfForm.getField(costFieldName).setValue(orderLine.getDescription());

        String quantityFieldName = "orderLine|" + i + "|quantity";
        pdfForm.getField(quantityFieldName).setValue(orderLine.getDescription());
        i++;
    }

    return pdfDocument;
}

From source file:org.jahia.modules.dm.thumbnails.impl.PDFBoxPDF2ImageConverterService.java

License:Open Source License

public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber)
        throws DocumentOperationException {
    BufferedImage image = null;//from   www. java 2s  .c  om

    long timer = System.currentTimeMillis();

    PDDocument pdfDoc = null;
    try {
        pdfDoc = PDDocument.load(pdfInputStream);
        PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber);
        image = page.convertToImage(imageType, resolution);

        if (image != null && logger.isDebugEnabled()) {
            logger.debug("Generated an image for the page {} of the supplied input stream in {} ms", pageNumber,
                    (System.currentTimeMillis() - timer));
        }
    } catch (IndexOutOfBoundsException e) {
        logger.warn("No page with the number {} found in the PDF document", pageNumber);
    } catch (IOException e) {
        throw new DocumentOperationException("Error occurred trying to generate an image for the page "
                + pageNumber + " of the supplied input stream", e);
    } finally {
        try {
            pdfDoc.close();
        } catch (Exception e) {
            // ignore
        }
    }

    return image;
}

From source file:org.jahia.modules.docviewer.PDFBoxPDF2ImageConverterService.java

License:Open Source License

public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws Exception {
    BufferedImage image = null;/*from   www.j a  v  a  2s. c  o  m*/

    PDDocument pdfDoc = null;
    try {
        pdfDoc = PDDocument.load(pdfInputStream);
        PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber);
        image = page.convertToImage(imageType, resolution);
    } catch (IndexOutOfBoundsException e) {
        logger.warn("No page with the number {} found in the PDF document", pageNumber);
    } finally {
        try {
            pdfDoc.close();
        } catch (Exception e) {
            // ignore
        }
    }

    return image;
}

From source file:org.knime.ext.textprocessing.nodes.source.parser.pdf.PDFDocumentParser.java

License:Open Source License

private Document parseInternal(final InputStream is) throws Exception {
    m_currentDoc = new DocumentBuilder(m_tokenizerName);
    m_currentDoc.setDocumentFile(new File(m_docPath));
    m_currentDoc.setDocumentType(m_type);
    m_currentDoc.addDocumentCategory(m_category);
    m_currentDoc.addDocumentSource(m_source);

    if (m_charset == null) {
        m_charset = Charset.defaultCharset();
    }/*from   w  w  w . ja  v a2  s .  c  o m*/

    PDDocument document = null;
    try {
        document = PDDocument.load(is);

        // extract text from pdf
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);
        String text = stripper.getText(document);
        m_currentDoc.addSection(text, SectionAnnotation.UNKNOWN);

        // extract meta data from pdf
        String title = null;
        String authors = null;

        if (m_filenameAsTitle) {
            title = m_docPath.toString().trim();
        }

        PDDocumentInformation information = document.getDocumentInformation();
        if (information != null) {
            if (!checkTitle(title)) {
                title = information.getTitle();
            }
            authors = information.getAuthor();
        }

        // if title meta data does not exist use first sentence
        if (!checkTitle(title)) {
            List<Section> sections = m_currentDoc.getSections();
            if (sections.size() > 0) {
                try {
                    title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim();
                } catch (IndexOutOfBoundsException e) {
                    LOGGER.debug("Parsed PDF document " + m_docPath + " is empty.");
                    title = "";
                }
            }
        }
        // if no useful first sentence exist use filename
        if (!checkTitle(title)) {
            title = m_docPath.toString().trim();
        }
        m_currentDoc.addTitle(title);

        // use author meta data
        if (authors != null) {
            Set<Author> authSet = AuthorUtil.parseAuthors(authors);
            for (Author a : authSet) {
                m_currentDoc.addAuthor(a);
            }
        }

        // add document to list
        return m_currentDoc.createDocument();
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 *///from  w  w w. j  av a 2  s  .  c  om
private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper)
        throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        } else {
            stripper.resetEngine();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();
        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addField(document, "contents", contents);

        addField(document, "stemmedcontents", contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addField(document, "Author", info.getAuthor());
            try {
                addField(document, "CreationDate", info.getCreationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Creator", info.getCreator());
            addField(document, "Keywords", info.getKeywords());
            try {
                addField(document, "ModificationDate", info.getModificationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Producer", info.getProducer());
            addField(document, "Subject", info.getSubject());
            addField(document, "Title", info.getTitle());
            addField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addField(document, "summary", summary);
        addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages()));
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

private PDDocument getPdfDocument(byte[] pdfFileData) throws Exception {
    InputStream is = new ByteArrayInputStream(pdfFileData);
    PDDocument originalDocument = PDDocument.load(is);
    return originalDocument;
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

private ByteArrayOutputStream getOriginalPdfDocumentAsOutputsStream(byte[] pdfFileData) throws Exception {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfFileData);
    PDDocument originalDocument = PDDocument.load(is);
    originalDocument.save(outputStream);
    originalDocument.close();/*from  w  w w  .  java  2  s  . c o m*/
    return outputStream;
}

From source file:org.kuali.kra.printing.service.impl.PersonSignatureServiceImpl.java

License:Educational Community License

/**
 * This method is to remove interactive fields from the form.
 * @param pdfBytes/*  ww  w  .  j  a  v  a 2s.  c o  m*/
 * @return
 * @throws Exception
 */
protected ByteArrayOutputStream getFlattenedPdfForm(byte[] pdfBytes) throws Exception {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfBytes);
    PDDocument pdDoc = PDDocument.load(is);
    PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
    PDAcroForm acroForm = pdCatalog.getAcroForm();
    COSDictionary acroFormDict = acroForm.getDictionary();
    COSArray fields = (COSArray) acroFormDict.getDictionaryObject("Fields");
    fields.clear();
    pdDoc.save(byteArrayOutputStream);
    return byteArrayOutputStream;
}

From source file:org.lucee.extension.pdf.PDFStruct.java

License:Open Source License

public PDDocument toPDDocument() throws CryptographyException, InvalidPasswordException, IOException {
    PDDocument doc;// w w  w.  j av a2  s . co  m
    if (barr != null)
        doc = PDDocument.load(new ByteArrayInputStream(barr, 0, barr.length));
    else if (resource instanceof File)
        doc = PDDocument.load((File) resource);
    else
        doc = PDDocument.load(new ByteArrayInputStream(PDFUtil.toBytes(resource), 0, barr.length));

    if (password != null)
        doc.decrypt(password);

    return doc;

}

From source file:org.mabb.fontverter.opentype.DebugGlyphDrawer.java

License:Open Source License

@Test
public void given_type0_withCFF_HelveticaNeueBug() throws Exception {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf"));

    PDFont rawType0Font = extractFont(doc, "TCQDAA+HelveticaNeue-Light-Identity-H");
    OpenTypeFont font = (OpenTypeFont) PdfFontExtractor.convertType0FontToOpenType((PDType0Font) rawType0Font);
    TestUtils.saveTempFile(font.getData(), "TCQDAA+HelveticaNeue-Light-Identity-H.ttf");

    FileUtils.writeByteArrayToFile(//from   w w  w . j a va 2 s .  c om
            new File("C:/projects/Pdf2Dom/fontTest/TCQDAA+HelveticaNeue-Light-Identity-H.ttf"), font.getData());
    List<TtfGlyph> glyphs = font.getGlyfTable().getNonEmptyGlyphs();
    TtfGlyph glyph = glyphs.get(1);
    List<TtfInstructionParser.TtfInstruction> instructions = glyph.getInstructions();

    DebugGlyphDrawer.drawGlyph(glyph);
}