List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.isisaddons.module.pdf.fixture.dom.templates.CustomerConfirmation.java
License:Apache License
/** * Loads the template pdf file and populates it with the order details * * @param order The order with the details for the pdf document * @return The populated PDF document//from w ww. java 2 s. c o m * @throws Exception If the loading or the populating of the document fails for some reason */ private PDDocument loadAndPopulateTemplate(Order order) throws Exception { PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfAsBytes)); PDAcroForm pdfForm = pdfDocument.getDocumentCatalog().getAcroForm(); List<PDField> fields = pdfForm.getFields(); SortedSet<OrderLine> orderLines = order.getOrderLines(); for (PDField field : fields) { String fullyQualifiedName = field.getFullyQualifiedName(); if ("orderDate".equals(fullyQualifiedName)) { field.setValue(order.getDate().toString()); } else if ("orderNumber".equals(fullyQualifiedName)) { field.setValue(order.getNumber()); } else if ("customerName".equals(fullyQualifiedName)) { field.setValue(order.getCustomerName()); } else if ("message".equals(fullyQualifiedName)) { String message = "You have ordered '" + orderLines.size() + "' products"; field.setValue(message); } else if ("preferences".equals(fullyQualifiedName)) { field.setValue(order.getPreferences()); } } int i = 1; Iterator<OrderLine> orderLineIterator = orderLines.iterator(); while (i < 7 && orderLineIterator.hasNext()) { OrderLine orderLine = orderLineIterator.next(); String descriptionFieldName = "orderLine|" + i + "|desc"; pdfForm.getField(descriptionFieldName).setValue(orderLine.getDescription()); String costFieldName = "orderLine|" + i + "|cost"; pdfForm.getField(costFieldName).setValue(orderLine.getDescription()); String quantityFieldName = "orderLine|" + i + "|quantity"; pdfForm.getField(quantityFieldName).setValue(orderLine.getDescription()); i++; } return pdfDocument; }
From source file:org.jahia.modules.dm.thumbnails.impl.PDFBoxPDF2ImageConverterService.java
License:Open Source License
public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws DocumentOperationException { BufferedImage image = null;//from www. java 2s .c om long timer = System.currentTimeMillis(); PDDocument pdfDoc = null; try { pdfDoc = PDDocument.load(pdfInputStream); PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber); image = page.convertToImage(imageType, resolution); if (image != null && logger.isDebugEnabled()) { logger.debug("Generated an image for the page {} of the supplied input stream in {} ms", pageNumber, (System.currentTimeMillis() - timer)); } } catch (IndexOutOfBoundsException e) { logger.warn("No page with the number {} found in the PDF document", pageNumber); } catch (IOException e) { throw new DocumentOperationException("Error occurred trying to generate an image for the page " + pageNumber + " of the supplied input stream", e); } finally { try { pdfDoc.close(); } catch (Exception e) { // ignore } } return image; }
From source file:org.jahia.modules.docviewer.PDFBoxPDF2ImageConverterService.java
License:Open Source License
public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws Exception { BufferedImage image = null;/*from www.j a v a 2s. c o m*/ PDDocument pdfDoc = null; try { pdfDoc = PDDocument.load(pdfInputStream); PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber); image = page.convertToImage(imageType, resolution); } catch (IndexOutOfBoundsException e) { logger.warn("No page with the number {} found in the PDF document", pageNumber); } finally { try { pdfDoc.close(); } catch (Exception e) { // ignore } } return image; }
From source file:org.knime.ext.textprocessing.nodes.source.parser.pdf.PDFDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); if (m_charset == null) { m_charset = Charset.defaultCharset(); }/*from w w w . ja v a2 s . c o m*/ PDDocument document = null; try { document = PDDocument.load(is); // extract text from pdf PDFTextStripper stripper = new PDFTextStripper(); stripper.setSortByPosition(true); String text = stripper.getText(document); m_currentDoc.addSection(text, SectionAnnotation.UNKNOWN); // extract meta data from pdf String title = null; String authors = null; if (m_filenameAsTitle) { title = m_docPath.toString().trim(); } PDDocumentInformation information = document.getDocumentInformation(); if (information != null) { if (!checkTitle(title)) { title = information.getTitle(); } authors = information.getAuthor(); } // if title meta data does not exist use first sentence if (!checkTitle(title)) { List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed PDF document " + m_docPath + " is empty."); title = ""; } } } // if no useful first sentence exist use filename if (!checkTitle(title)) { title = m_docPath.toString().trim(); } m_currentDoc.addTitle(title); // use author meta data if (authors != null) { Set<Author> authSet = AuthorUtil.parseAuthors(authors); for (Author a : authSet) { m_currentDoc.addAuthor(a); } } // add document to list return m_currentDoc.createDocument(); } finally { if (document != null) { document.close(); } } }
From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java
License:Apache License
/** * This will add the contents to the lucene document. * * @param document The document to add the contents to. * @param is The stream to get the contents from. * @param documentLocation The location of the document, used just for debug messages. * * @throws IOException If there is an error parsing the document. *///from w w w. j av a 2 s . c om private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper) throws IOException { PDDocument pdfDocument = null; try { pdfDocument = PDDocument.load(is); if (pdfDocument.isEncrypted()) { //Just try using the default password and move on pdfDocument.decrypt(""); } //create a writer where to append the text content. StringWriter writer = new StringWriter(); if (stripper == null) { stripper = new PDFTextStripper(); } else { stripper.resetEngine(); } stripper.writeText(pdfDocument, writer); // Note: the buffer to string operation is costless; // the char array value of the writer buffer and the content string // is shared as long as the buffer content is not modified, which will // not occur here. String contents = writer.getBuffer().toString(); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. addField(document, "contents", contents); addField(document, "stemmedcontents", contents); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null) { addField(document, "Author", info.getAuthor()); try { addField(document, "CreationDate", info.getCreationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } addField(document, "Creator", info.getCreator()); addField(document, "Keywords", info.getKeywords()); try { addField(document, "ModificationDate", info.getModificationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } addField(document, "Producer", info.getProducer()); addField(document, "Subject", info.getSubject()); addField(document, "Title", info.getTitle()); addField(document, "Trapped", info.getTrapped()); } int summarySize = Math.min(contents.length(), 500); String summary = contents.substring(0, summarySize); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. addField(document, "summary", summary); addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages())); } catch (CryptographyException e) { throw new IOException("Error decrypting document(" + documentLocation + "): " + e); } catch (InvalidPasswordException e) { //they didn't suppply a password and the default of "" was wrong. throw new IOException( "Error: The document(" + documentLocation + ") is encrypted and will not be indexed."); } finally { if (pdfDocument != null) { pdfDocument.close(); } } }
From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java
License:Open Source License
private PDDocument getPdfDocument(byte[] pdfFileData) throws Exception { InputStream is = new ByteArrayInputStream(pdfFileData); PDDocument originalDocument = PDDocument.load(is); return originalDocument; }
From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java
License:Open Source License
private ByteArrayOutputStream getOriginalPdfDocumentAsOutputsStream(byte[] pdfFileData) throws Exception { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); InputStream is = new ByteArrayInputStream(pdfFileData); PDDocument originalDocument = PDDocument.load(is); originalDocument.save(outputStream); originalDocument.close();/*from w w w . java 2 s . c o m*/ return outputStream; }
From source file:org.kuali.kra.printing.service.impl.PersonSignatureServiceImpl.java
License:Educational Community License
/** * This method is to remove interactive fields from the form. * @param pdfBytes/* ww w . j a v a 2s. c o m*/ * @return * @throws Exception */ protected ByteArrayOutputStream getFlattenedPdfForm(byte[] pdfBytes) throws Exception { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); InputStream is = new ByteArrayInputStream(pdfBytes); PDDocument pdDoc = PDDocument.load(is); PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog(); PDAcroForm acroForm = pdCatalog.getAcroForm(); COSDictionary acroFormDict = acroForm.getDictionary(); COSArray fields = (COSArray) acroFormDict.getDictionaryObject("Fields"); fields.clear(); pdDoc.save(byteArrayOutputStream); return byteArrayOutputStream; }
From source file:org.lucee.extension.pdf.PDFStruct.java
License:Open Source License
public PDDocument toPDDocument() throws CryptographyException, InvalidPasswordException, IOException { PDDocument doc;// w w w. j av a2 s . co m if (barr != null) doc = PDDocument.load(new ByteArrayInputStream(barr, 0, barr.length)); else if (resource instanceof File) doc = PDDocument.load((File) resource); else doc = PDDocument.load(new ByteArrayInputStream(PDFUtil.toBytes(resource), 0, barr.length)); if (password != null) doc.decrypt(password); return doc; }
From source file:org.mabb.fontverter.opentype.DebugGlyphDrawer.java
License:Open Source License
@Test public void given_type0_withCFF_HelveticaNeueBug() throws Exception { PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf")); PDFont rawType0Font = extractFont(doc, "TCQDAA+HelveticaNeue-Light-Identity-H"); OpenTypeFont font = (OpenTypeFont) PdfFontExtractor.convertType0FontToOpenType((PDType0Font) rawType0Font); TestUtils.saveTempFile(font.getData(), "TCQDAA+HelveticaNeue-Light-Identity-H.ttf"); FileUtils.writeByteArrayToFile(//from w w w . j a va 2 s . c om new File("C:/projects/Pdf2Dom/fontTest/TCQDAA+HelveticaNeue-Light-Identity-H.ttf"), font.getData()); List<TtfGlyph> glyphs = font.getGlyfTable().getNonEmptyGlyphs(); TtfGlyph glyph = glyphs.get(1); List<TtfInstructionParser.TtfInstruction> instructions = glyph.getInstructions(); DebugGlyphDrawer.drawGlyph(glyph); }