Example usage for org.apache.pdfbox.pdmodel PDPage hasContents

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage hasContents.

Prototype

public boolean hasContents()

Source Link

Document

Returns true if this page has one or more content streams.

Usage

From source file:com.repeatability.pdf.PDFTextStripper.java

License:Apache License

/**
 * This will process all of the pages and the text that is in them.
 *
 * @param pages The pages object in the document.
 *
 * @throws IOException If there is an error parsing the text.
 *///from w w w  .  ja  va2 s.  c  o m
protected void processPages(PDPageTree pages) throws IOException {
    PDPage startBookmarkPage = startBookmark == null ? null : startBookmark.findDestinationPage(document);
    if (startBookmarkPage != null) {
        startBookmarkPageNumber = pages.indexOf(startBookmarkPage) + 1;
    } else {
        // -1 = undefined
        startBookmarkPageNumber = -1;
    }

    PDPage endBookmarkPage = endBookmark == null ? null : endBookmark.findDestinationPage(document);
    if (endBookmarkPage != null) {
        endBookmarkPageNumber = pages.indexOf(endBookmarkPage) + 1;
    } else {
        // -1 = undefined
        endBookmarkPageNumber = -1;
    }

    if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1
            && endBookmark != null && startBookmark.getCOSObject() == endBookmark.getCOSObject()) {
        // this is a special case where both the start and end bookmark
        // are the same but point to nothing. In this case
        // we will not extract any text.
        startBookmarkPageNumber = 0;
        endBookmarkPageNumber = 0;
    }

    for (PDPage page : pages) {
        currentPageNo++;
        if (page.hasContents()) {
            processPage(page);
        }
    }
}