Example usage for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:org.jahia.modules.docviewer.PDFBoxPDF2ImageConverterService.java

License:Open Source License

public BufferedImage getImageOfPage(InputStream pdfInputStream, int pageNumber) throws Exception {
    BufferedImage image = null;//  ww  w. j av a2s  .co m

    PDDocument pdfDoc = null;
    try {
        pdfDoc = PDDocument.load(pdfInputStream);
        PDPage page = (PDPage) pdfDoc.getDocumentCatalog().getAllPages().get(pageNumber);
        image = page.convertToImage(imageType, resolution);
    } catch (IndexOutOfBoundsException e) {
        logger.warn("No page with the number {} found in the PDF document", pageNumber);
    } finally {
        try {
            pdfDoc.close();
        } catch (Exception e) {
            // ignore
        }
    }

    return image;
}

From source file:org.kimios.kernel.index.filters.PDFFilter.java

License:Open Source License

public String getBody(InputStream in) throws IOException {
    PDFParser parser = new PDFParser(in);
    parser.parse();/*from   w  ww .j  av  a 2 s  .  c  om*/
    COSDocument cosDoc = parser.getDocument();
    PDDocument pDDoc = new PDDocument(cosDoc);
    String out = new PDFTextStripper().getText(pDDoc);
    pDDoc.close();
    return out;
}

From source file:org.knime.ext.textprocessing.nodes.source.parser.pdf.PDFDocumentParser.java

License:Open Source License

private Document parseInternal(final InputStream is) throws Exception {
    m_currentDoc = new DocumentBuilder(m_tokenizerName);
    m_currentDoc.setDocumentFile(new File(m_docPath));
    m_currentDoc.setDocumentType(m_type);
    m_currentDoc.addDocumentCategory(m_category);
    m_currentDoc.addDocumentSource(m_source);

    if (m_charset == null) {
        m_charset = Charset.defaultCharset();
    }/* w  ww. ja v a 2s . c o m*/

    PDDocument document = null;
    try {
        document = PDDocument.load(is);

        // extract text from pdf
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);
        String text = stripper.getText(document);
        m_currentDoc.addSection(text, SectionAnnotation.UNKNOWN);

        // extract meta data from pdf
        String title = null;
        String authors = null;

        if (m_filenameAsTitle) {
            title = m_docPath.toString().trim();
        }

        PDDocumentInformation information = document.getDocumentInformation();
        if (information != null) {
            if (!checkTitle(title)) {
                title = information.getTitle();
            }
            authors = information.getAuthor();
        }

        // if title meta data does not exist use first sentence
        if (!checkTitle(title)) {
            List<Section> sections = m_currentDoc.getSections();
            if (sections.size() > 0) {
                try {
                    title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim();
                } catch (IndexOutOfBoundsException e) {
                    LOGGER.debug("Parsed PDF document " + m_docPath + " is empty.");
                    title = "";
                }
            }
        }
        // if no useful first sentence exist use filename
        if (!checkTitle(title)) {
            title = m_docPath.toString().trim();
        }
        m_currentDoc.addTitle(title);

        // use author meta data
        if (authors != null) {
            Set<Author> authSet = AuthorUtil.parseAuthors(authors);
            for (Author a : authSet) {
                m_currentDoc.addAuthor(a);
            }
        }

        // add document to list
        return m_currentDoc.createDocument();
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 *///w w  w.j  ava2s  . com
private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper)
        throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        } else {
            stripper.resetEngine();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();
        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addField(document, "contents", contents);

        addField(document, "stemmedcontents", contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addField(document, "Author", info.getAuthor());
            try {
                addField(document, "CreationDate", info.getCreationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Creator", info.getCreator());
            addField(document, "Keywords", info.getKeywords());
            try {
                addField(document, "ModificationDate", info.getModificationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Producer", info.getProducer());
            addField(document, "Subject", info.getSubject());
            addField(document, "Title", info.getTitle());
            addField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addField(document, "summary", summary);
        addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages()));
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

/**
 * This method is to scan for signature tag in each page and apply the signature
 * at desired location.//from www. j a v a2s. c  o  m
 * @param imageData
 * @param originalByteArrayOutputStream
 */
@SuppressWarnings("unchecked")
protected ByteArrayOutputStream scanAndApplyAutographInEachPage(byte[] imageData,
        ByteArrayOutputStream originalByteArrayOutputStream) throws Exception {
    ByteArrayOutputStream outputStream = originalByteArrayOutputStream;
    byte[] pdfFileData = originalByteArrayOutputStream.toByteArray();
    PDDocument originalDocument = getPdfDocument(pdfFileData); //PDDocument.load(is);
    PDDocument signatureDocument = new PDDocument();
    List<PDPage> originalDocumentPages = originalDocument.getDocumentCatalog().getAllPages();
    for (PDPage page : originalDocumentPages) {
        List<String> signatureTags = new ArrayList<String>(getSignatureTagParameter());
        PersonSignatureLocationHelper printer = new PersonSignatureLocationHelper(signatureTags);
        PDStream contents = page.getContents();
        if (contents != null) {
            printer.processStream(page, page.findResources(), page.getContents().getStream());
        }
        PDPage signaturePage = new PDPage();
        if (printer.isSignatureTagExists()) {
            PDJpeg signatureImage = new PDJpeg(signatureDocument, getBufferedImage(imageData));
            PDPageContentStream stream = new PDPageContentStream(signatureDocument, signaturePage, true, true);
            for (PersonSignaturePrintHelper signatureHelper : printer.getPersonSignatureLocations()) {
                float coordinateX = signatureHelper.getCoordinateX();
                float coordinateY = signatureHelper.getCoordinateY() - signatureImage.getHeight()
                        - ADDITIONAL_SPACE_BETWEEN_TAG_AND_IMAGE;
                stream.drawImage(signatureImage, coordinateX, coordinateY);
                stream.close();
            }
        } else {
            signaturePage = page;
        }
        signatureDocument.addPage(signaturePage);
    }

    Overlay overlay = new Overlay();
    overlay.overlay(signatureDocument, originalDocument);

    originalDocument.save(outputStream);
    originalDocument.close();
    signatureDocument.close();
    return outputStream;
}

From source file:org.kuali.coeus.common.impl.person.signature.PersonSignatureServiceImpl.java

License:Open Source License

private ByteArrayOutputStream getOriginalPdfDocumentAsOutputsStream(byte[] pdfFileData) throws Exception {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfFileData);
    PDDocument originalDocument = PDDocument.load(is);
    originalDocument.save(outputStream);
    originalDocument.close();
    return outputStream;
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

private static void extractPdfFonts(String extractPath, File pdfFile, FontFormat format) throws IOException {
    File fontExtractDir = new File(extractPath);
    if (!fontExtractDir.exists())
        fontExtractDir.mkdir();//from w w  w . ja  v a  2s.  co m

    PDDocument pdf = PDDocument.load(pdfFile);

    PdfFontExtractor fontExtractor = new PdfFontExtractor();
    fontExtractor.setExtractFormat(format);
    fontExtractor.extractFontsToDir(pdf, extractPath);

    pdf.close();
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

public void extractFontsToDir(File pdf, String path) throws IOException {
    PDDocument doc = PDDocument.load(pdf);
    extractFontsToDir(doc, path);//w  w  w.java2s . c  o m
    doc.close();
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

public void extractFontsToDir(byte[] pdf, String path) throws IOException {
    PDDocument doc = PDDocument.load(pdf);
    extractFontsToDir(doc, path);//  w ww  .  j  av a 2  s  .c o m
    doc.close();
}

From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java

License:Open Source License

@Test
public void givenPdfWith2Fonts_extractFontsToFVFontList_thenListHasSameNumberOfFonts() throws IOException {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf"));
    PdfFontExtractor extractor = new PdfFontExtractor();

    List<FVFont> fonts = extractor.extractToFVFonts(doc);

    Assert.assertEquals(3, fonts.size());
    doc.close();
}