Example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages.

Prototype

public int getNumberOfPages() 

Source Link

Document

This will return the total page count of the PDF document.

Usage

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

public int getPageCount() throws DocumentException {

    int pageCount = 0;

    if (content == null) {
        return pageCount;
    }// w w w  . j  a va 2s .co m

    ByteArrayInputStream bais = null;
    PDDocument document = null;

    try {

        bais = new ByteArrayInputStream(content);
        document = PDDocument.load(bais);
        pageCount = document.getNumberOfPages();
    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        if (document != null)
            try {
                document.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        IOUtils.closeQuietly(bais);
    }

    return pageCount;

}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

@Override
public void append(Document document) throws DocumentException {

    super.append(document);

    ByteArrayOutputStream baos = null;
    PDDocument mergedDocument = new PDDocument();

    try {//w  ww  .j av  a  2  s  .  com

        baos = new ByteArrayOutputStream();
        ByteArrayInputStream bais = new ByteArrayInputStream(content);
        PDDocument pDocument = PDDocument.load(bais);
        int pageCount = pDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pDocument.getDocumentCatalog().getAllPages().get(i));
        }

        // copy new document
        ByteArrayInputStream baisNewDoc = new ByteArrayInputStream(document.getContent());
        PDDocument pNewDocument = PDDocument.load(baisNewDoc);
        pageCount = pNewDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pNewDocument.getDocumentCatalog().getAllPages().get(i));
        }
        mergedDocument.save(baos);
        mergedDocument.close();
        // replace content with new content
        content = baos.toByteArray();

    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        IOUtils.closeQuietly(baos);
    }

}

From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 *//*from  w  w  w  .jav  a 2s .  c om*/
private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper)
        throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        } else {
            stripper.resetEngine();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();
        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addField(document, "contents", contents);

        addField(document, "stemmedcontents", contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addField(document, "Author", info.getAuthor());
            try {
                addField(document, "CreationDate", info.getCreationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Creator", info.getCreator());
            addField(document, "Keywords", info.getKeywords());
            try {
                addField(document, "ModificationDate", info.getModificationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Producer", info.getProducer());
            addField(document, "Subject", info.getSubject());
            addField(document, "Title", info.getTitle());
            addField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addField(document, "summary", summary);
        addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages()));
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.mycore.media.MCRMediaPDFParser.java

License:Open Source License

/**
 * Parse file and store metadata in related Object.
 * /* www.jav a2  s . c  om*/
 * @return MCRMediaObject
 *              can be held any MCRMediaObject
 * @see MCRMediaObject#clone()
 */
@SuppressWarnings("unchecked")
public synchronized MCRMediaObject parse(File file) throws Exception {
    if (!file.exists())
        throw new IOException("File \"" + file.getName() + "\" doesn't exists!");

    MCRPDFObject media = new MCRPDFObject();

    LOGGER.info("parse " + file.getName() + "...");

    PDDocument pdf = PDDocument.load(file);
    try {
        media.fileName = file.getName();
        media.fileSize = file.length();
        media.folderName = (file.getAbsolutePath()).replace(file.getName(), "");

        PDPageTree pages = pdf.getDocumentCatalog().getPages();

        media.numPages = pdf.getNumberOfPages();

        PDPage page = (PDPage) pages.get(0);
        PDRectangle rect = page.getMediaBox();

        media.width = Math.round(rect.getWidth());
        media.height = Math.round(rect.getHeight());

        PDDocumentInformation info = pdf.getDocumentInformation();
        if (info != null) {
            media.tags = new MCRMediaTagObject();
            media.tags.author = info.getAuthor();
            media.tags.creator = info.getCreator();
            media.tags.producer = info.getProducer();
            media.tags.title = info.getTitle();
            media.tags.subject = info.getSubject();
            media.tags.keywords = info.getKeywords();
        }
    } catch (Exception e) {
        LOGGER.error(e.getMessage());
        throw new Exception(e.getMessage());
    } finally {
        pdf.close();
    }

    return media;
}

From source file:org.nines.NinesStatementHandlerTest.java

License:Apache License

@Test
public void testPdfStrip() {
    try {/*  w w  w  .j a  v a2 s. c  o m*/
        FileInputStream is = new FileInputStream(new File("test_data/sample.pdf"));
        PDDocument pdfDoc = PDDocument.load(is);
        assertEquals(2, pdfDoc.getNumberOfPages());
        PDFTextStripper pdfStrip = new PDFTextStripper();
        String text = pdfStrip.getText(pdfDoc);

        assertNotNull(text);
        System.out.println(text);

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.nuxeo.pdf.test.PDFMergeTest.java

License:Open Source License

protected void checkMergedPDF(Blob inBlob, boolean jutsFirst2Pages) throws IOException {

    File tempFile = File.createTempFile("testmergepdf", ".pdf");
    utils.track(tempFile);//w  w w.  ja va 2  s . c om
    inBlob.transferTo(tempFile);

    PDDocument doc = PDDocument.load(tempFile);
    assertNotNull(doc);
    utils.track(doc);

    // 2 + 3 + 1
    if (jutsFirst2Pages) {
        assertEquals(5, doc.getNumberOfPages());
    } else {
        assertEquals(6, doc.getNumberOfPages());
    }

    String txt;
    txt = utils.extractText(doc, 1, 1);
    assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "1") > -1);

    txt = utils.extractText(doc, 3, 3);
    assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "2") > -1);

    if (!jutsFirst2Pages) {
        txt = utils.extractText(doc, 6, 6);
        assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "3") > -1);
    }

    doc.close();
    utils.untrack(doc);

    tempFile.delete();
    utils.untrack(tempFile);

}

From source file:org.nuxeo.pdf.test.PDFPageExtractorTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);/*from w w  w .ja  v  a 2 s.  c  o  m*/
    utils.track(doc);

    assertEquals(13, doc.getNumberOfPages());

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageExtractorTest.java

License:Open Source License

protected void checkExtractedPdf(Blob inBlob, int inExpectedPageCount, String inExpectedTextAtPos0)
        throws Exception {

    PDDocument doc = PDDocument.load(inBlob.getStream());
    utils.track(doc);/*w w w .  ja va2 s. c o  m*/

    assertEquals(inExpectedPageCount, doc.getNumberOfPages());

    String txt = utils.extractText(doc, 1, 1);
    assertEquals(0, txt.indexOf(inExpectedTextAtPos0));

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageNumberingTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);/*from   w  ww. ja  v  a 2s  .  c  o  m*/
    utils.track(doc);

    assertEquals(13, doc.getNumberOfPages());

    PDFTextStripper stripper = new PDFTextStripper();
    String allTheText = stripper.getText(doc);

    for (int i = 0; i < 10; i++) {
        assertEquals(-1, allTheText.indexOf("" + i));
    }

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFTextExtractorTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);// ww  w. j  a  v a 2  s. co  m
    utils.track(doc);

    assertEquals(6, doc.getNumberOfPages());

    doc.close();
    utils.untrack(doc);
}