Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:org.nuxeo.pdf.test.PDFMergeTest.java

License:Open Source License

protected void checkMergedPDF(Blob inBlob, boolean jutsFirst2Pages) throws IOException {

    File tempFile = File.createTempFile("testmergepdf", ".pdf");
    utils.track(tempFile);/*w ww  . j a va2s.c  o m*/
    inBlob.transferTo(tempFile);

    PDDocument doc = PDDocument.load(tempFile);
    assertNotNull(doc);
    utils.track(doc);

    // 2 + 3 + 1
    if (jutsFirst2Pages) {
        assertEquals(5, doc.getNumberOfPages());
    } else {
        assertEquals(6, doc.getNumberOfPages());
    }

    String txt;
    txt = utils.extractText(doc, 1, 1);
    assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "1") > -1);

    txt = utils.extractText(doc, 3, 3);
    assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "2") > -1);

    if (!jutsFirst2Pages) {
        txt = utils.extractText(doc, 6, 6);
        assertTrue(txt.indexOf(MERGEPDF_CHECK_PREFIX + "3") > -1);
    }

    doc.close();
    utils.untrack(doc);

    tempFile.delete();
    utils.untrack(tempFile);

}

From source file:org.nuxeo.pdf.test.PDFPageExtractorTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);// w  ww .ja  v a  2  s  . co  m
    utils.track(doc);

    assertEquals(13, doc.getNumberOfPages());

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageExtractorTest.java

License:Open Source License

protected void checkExtractedPdf(Blob inBlob, int inExpectedPageCount, String inExpectedTextAtPos0)
        throws Exception {

    PDDocument doc = PDDocument.load(inBlob.getStream());
    utils.track(doc);//from   w ww . j  a  v  a 2s.c om

    assertEquals(inExpectedPageCount, doc.getNumberOfPages());

    String txt = utils.extractText(doc, 1, 1);
    assertEquals(0, txt.indexOf(inExpectedTextAtPos0));

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageExtractorTest.java

License:Open Source License

@Test
public void testExtractPages_WithSetInfo() throws Exception {

    Blob extracted;//  w ww . jav  a  2s. c o m
    String originalName = pdfFileBlob.getFilename().replace(".pdf", "");
    PDFPageExtractor pe = new PDFPageExtractor(pdfFileBlob);

    extracted = pe.extract(5, 9, null, "One Upon a Time", "Fairyland", "Cool Author");
    assertTrue(extracted instanceof FileBlob);
    assertEquals(originalName + "-5-9.pdf", extracted.getFilename());
    PDDocument doc = PDDocument.load(extracted.getStream());
    utils.track(doc);
    PDDocumentInformation docInfo = doc.getDocumentInformation();
    assertEquals("One Upon a Time", docInfo.getTitle());
    assertEquals("Fairyland", docInfo.getSubject());
    assertEquals("Cool Author", docInfo.getAuthor());
    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageNumberingTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);/*w  ww  . j  ava 2 s .c o  m*/
    utils.track(doc);

    assertEquals(13, doc.getNumberOfPages());

    PDFTextStripper stripper = new PDFTextStripper();
    String allTheText = stripper.getText(doc);

    for (int i = 0; i < 10; i++) {
        assertEquals(-1, allTheText.indexOf("" + i));
    }

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFPageNumberingTest.java

License:Open Source License

protected void checkHasNumberInPage(File inDoc, int inExpected, int inPageNumber, String inPosition)
        throws IOException {

    PDDocument doc = PDDocument.load(inDoc);
    assertNotNull(doc);/*from  w  ww.  jav a  2  s  .  co m*/
    utils.track(doc);

    String text = utils.extractText(doc, inPageNumber, inPageNumber);
    int pos = text.indexOf("" + inExpected);
    assertTrue(inPosition + ", expecting " + inExpected + " for page " + inPageNumber, pos > -1);

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFTextExtractorTest.java

License:Open Source License

protected void checkPDFBeforeTest() throws IOException {

    PDDocument doc = PDDocument.load(pdfFile);
    assertNotNull(doc);//w  w w . jav  a2 s.c o  m
    utils.track(doc);

    assertEquals(6, doc.getNumberOfPages());

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFUtilsTest.java

License:Open Source License

@Test
public void test_saveInTempFile() throws Exception {

    PDDocument doc = PDDocument.load(pdfFile);
    utils.track(doc);//from  ww  w  .j av a 2 s .  c om

    FileBlob fb = PDFUtils.saveInTempFile(doc);
    assertNotNull(fb);
    assertEquals("application/pdf", fb.getMimeType());

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFUtilsTest.java

License:Open Source License

@Test
public void test_setInfos() throws Exception {

    PDDocument doc = PDDocument.load(pdfFile);
    utils.track(doc);/*from   w  w w. j  a v  a2 s. com*/

    PDDocumentInformation docInfoOriginal = doc.getDocumentInformation();
    // Check original document has the expected values
    assertEquals("Untitled 3", docInfoOriginal.getTitle());
    assertNull(docInfoOriginal.getSubject());
    assertNull(docInfoOriginal.getAuthor());
    // Now, modify
    // First, actually, don't modify
    PDFUtils.setInfos(doc, null, "", null);
    PDDocumentInformation newDocInfo = doc.getDocumentInformation();
    assertEquals(docInfoOriginal.getTitle(), newDocInfo.getTitle());
    assertEquals(docInfoOriginal.getSubject(), newDocInfo.getSubject());
    assertEquals(docInfoOriginal.getAuthor(), newDocInfo.getAuthor());
    // Now, modify
    PDFUtils.setInfos(doc, "The Title", "The Subject", "The Author");
    newDocInfo = doc.getDocumentInformation();
    assertEquals("The Title", newDocInfo.getTitle());
    assertEquals("The Subject", newDocInfo.getSubject());
    assertEquals("The Author", newDocInfo.getAuthor());

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.pdf.test.PDFWatermarkingTest.java

License:Open Source License

protected void checkHasWatermarkOnAllPages(Blob inBlob, String inWatermark) throws Exception {

    PDDocument doc = PDDocument.load(inBlob.getStream());
    utils.track(doc);/*from  w  ww  .  j a  va2  s .  com*/

    int count = doc.getNumberOfPages();
    for (int i = 1; i <= count; i++) {
        String txt = utils.extractText(doc, i, i);
        int pos = txt.indexOf(inWatermark);
        assertTrue("for page " + i + ", found pos is " + pos, pos > -1);
    }

    doc.close();
    utils.untrack(doc);
}