List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:org.jahia.modules.docviewer.PDFBoxPDF2ImageConverterService.java
License:Open Source License
public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws Exception { BufferedImage image = null;// ww w. j av a2s .co m PDDocument pdfDoc = null; try { pdfDoc = PDDocument.load(pdfInputStream); PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber); image = page.convertToImage(imageType, resolution); } catch (IndexOutOfBoundsException e) { logger.warn("No page with the number {} found in the PDF document", pageNumber); } finally { try { pdfDoc.close(); } catch (Exception e) { // ignore } } return image; }
From source file:org.kimios.kernel.index.filters.PDFFilter.java
License:Open Source License
public String getBody(InputStream in) throws IOException { PDFParser parser = new PDFParser(in); parser.parse();/*from w ww .j av a 2 s . c om*/ COSDocument cosDoc = parser.getDocument(); PDDocument pDDoc = new PDDocument(cosDoc); String out = new PDFTextStripper().getText(pDDoc); pDDoc.close(); return out; }
From source file:org.knime.ext.textprocessing.nodes.source.parser.pdf.PDFDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); if (m_charset == null) { m_charset = Charset.defaultCharset(); }/* w ww. ja v a 2s . c o m*/ PDDocument document = null; try { document = PDDocument.load(is); // extract text from pdf PDFTextStripper stripper = new PDFTextStripper(); stripper.setSortByPosition(true); String text = stripper.getText(document); m_currentDoc.addSection(text, SectionAnnotation.UNKNOWN); // extract meta data from pdf String title = null; String authors = null; if (m_filenameAsTitle) { title = m_docPath.toString().trim(); } PDDocumentInformation information = document.getDocumentInformation(); if (information != null) { if (!checkTitle(title)) { title = information.getTitle(); } authors = information.getAuthor(); } // if title meta data does not exist use first sentence if (!checkTitle(title)) { List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed PDF document " + m_docPath + " is empty."); title = ""; } } } // if no useful first sentence exist use filename if (!checkTitle(title)) { title = m_docPath.toString().trim(); } m_currentDoc.addTitle(title); // use author meta data if (authors != null) { Set<Author> authSet = AuthorUtil.parseAuthors(authors); for (Author a : authSet) { m_currentDoc.addAuthor(a); } } // add document to list return m_currentDoc.createDocument(); } finally { if (document != null) { document.close(); } } }
From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java
License:Apache License
/** * This will add the contents to the lucene document. * * @param document The document to add the contents to. * @param is The stream to get the contents from. * @param documentLocation The location of the document, used just for debug messages. * * @throws IOException If there is an error parsing the document. *///w w w.j ava2s . com private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper) throws IOException { PDDocument pdfDocument = null; try { pdfDocument = PDDocument.load(is); if (pdfDocument.isEncrypted()) { //Just try using the default password and move on pdfDocument.decrypt(""); } //create a writer where to append the text content. StringWriter writer = new StringWriter(); if (stripper == null) { stripper = new PDFTextStripper(); } else { stripper.resetEngine(); } stripper.writeText(pdfDocument, writer); // Note: the buffer to string operation is costless; // the char array value of the writer buffer and the content string // is shared as long as the buffer content is not modified, which will // not occur here. String contents = writer.getBuffer().toString(); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. addField(document, "contents", contents); addField(document, "stemmedcontents", contents); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null) { addField(document, "Author", info.getAuthor()); try { addField(document, "CreationDate", info.getCreationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } addField(document, "Creator", info.getCreator()); addField(document, "Keywords", info.getKeywords()); try { addField(document, "ModificationDate", info.getModificationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } addField(document, "Producer", info.getProducer()); addField(document, "Subject", info.getSubject()); addField(document, "Title", info.getTitle()); addField(document, "Trapped", info.getTrapped()); } int summarySize = Math.min(contents.length(), 500); String summary = contents.substring(0, summarySize); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. addField(document, "summary", summary); addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages())); } catch (CryptographyException e) { throw new IOException("Error decrypting document(" + documentLocation + "): " + e); } catch (InvalidPasswordException e) { //they didn't suppply a password and the default of "" was wrong. throw new IOException( "Error: The document(" + documentLocation + ") is encrypted and will not be indexed."); } finally { if (pdfDocument != null) { pdfDocument.close(); } } }
From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java
License:Open Source License
/** * This method is to scan for signature tag in each page and apply the signature * at desired location.//from www. j a v a2s. c o m * @param imageData * @param originalByteArrayOutputStream */ @SuppressWarnings("unchecked") protected ByteArrayOutputStream scanAndApplyAutographInEachPage(byte[] imageData, ByteArrayOutputStream originalByteArrayOutputStream) throws Exception { ByteArrayOutputStream outputStream = originalByteArrayOutputStream; byte[] pdfFileData = originalByteArrayOutputStream.toByteArray(); PDDocument originalDocument = getPdfDocument(pdfFileData); //PDDocument.load(is); PDDocument signatureDocument = new PDDocument(); List<PDPage> originalDocumentPages = originalDocument.getDocumentCatalog().getAllPages(); for (PDPage page : originalDocumentPages) { List<String> signatureTags = new ArrayList<String>(getSignatureTagParameter()); PersonSignatureLocationHelper printer = new PersonSignatureLocationHelper(signatureTags); PDStream contents = page.getContents(); if (contents != null) { printer.processStream(page, page.findResources(), page.getContents().getStream()); } PDPage signaturePage = new PDPage(); if (printer.isSignatureTagExists()) { PDJpeg signatureImage = new PDJpeg(signatureDocument, getBufferedImage(imageData)); PDPageContentStream stream = new PDPageContentStream(signatureDocument, signaturePage, true, true); for (PersonSignaturePrintHelper signatureHelper : printer.getPersonSignatureLocations()) { float coordinateX = signatureHelper.getCoordinateX(); float coordinateY = signatureHelper.getCoordinateY() - signatureImage.getHeight() - ADDITIONAL_SPACE_BETWEEN_TAG_AND_IMAGE; stream.drawImage(signatureImage, coordinateX, coordinateY); stream.close(); } } else { signaturePage = page; } signatureDocument.addPage(signaturePage); } Overlay overlay = new Overlay(); overlay.overlay(signatureDocument, originalDocument); originalDocument.save(outputStream); originalDocument.close(); signatureDocument.close(); return outputStream; }
From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java
License:Open Source License
private ByteArrayOutputStream getOriginalPdfDocumentAsOutputsStream(byte[] pdfFileData) throws Exception { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); InputStream is = new ByteArrayInputStream(pdfFileData); PDDocument originalDocument = PDDocument.load(is); originalDocument.save(outputStream); originalDocument.close(); return outputStream; }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
private static void extractPdfFonts(String extractPath, File pdfFile, FontFormat format) throws IOException { File fontExtractDir = new File(extractPath); if (!fontExtractDir.exists()) fontExtractDir.mkdir();//from w w w . ja v a 2s. co m PDDocument pdf = PDDocument.load(pdfFile); PdfFontExtractor fontExtractor = new PdfFontExtractor(); fontExtractor.setExtractFormat(format); fontExtractor.extractFontsToDir(pdf, extractPath); pdf.close(); }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
public void extractFontsToDir(File pdf, String path) throws IOException { PDDocument doc = PDDocument.load(pdf); extractFontsToDir(doc, path);//w w w.java2s . c o m doc.close(); }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
public void extractFontsToDir(byte[] pdf, String path) throws IOException { PDDocument doc = PDDocument.load(pdf); extractFontsToDir(doc, path);// w ww . j av a 2 s .c o m doc.close(); }
From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java
License:Open Source License
@Test public void givenPdfWith2Fonts_extractFontsToFVFontList_thenListHasSameNumberOfFonts() throws IOException { PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf")); PdfFontExtractor extractor = new PdfFontExtractor(); List<FVFont> fonts = extractor.extractToFVFonts(doc); Assert.assertEquals(3, fonts.size()); doc.close(); }