List of usage examples for org.apache.pdfbox.pdmodel PDPage hasContents
public boolean hasContents()
From source file:com.repeatability.pdf.PDFTextStripper.java
License:Apache License
/** * This will process all of the pages and the text that is in them. * * @param pages The pages object in the document. * * @throws IOException If there is an error parsing the text. *///from w w w . ja va2 s. c o m protected void processPages(PDPageTree pages) throws IOException { PDPage startBookmarkPage = startBookmark == null ? null : startBookmark.findDestinationPage(document); if (startBookmarkPage != null) { startBookmarkPageNumber = pages.indexOf(startBookmarkPage) + 1; } else { // -1 = undefined startBookmarkPageNumber = -1; } PDPage endBookmarkPage = endBookmark == null ? null : endBookmark.findDestinationPage(document); if (endBookmarkPage != null) { endBookmarkPageNumber = pages.indexOf(endBookmarkPage) + 1; } else { // -1 = undefined endBookmarkPageNumber = -1; } if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1 && endBookmark != null && startBookmark.getCOSObject() == endBookmark.getCOSObject()) { // this is a special case where both the start and end bookmark // are the same but point to nothing. In this case // we will not extract any text. startBookmarkPageNumber = 0; endBookmarkPageNumber = 0; } for (PDPage page : pages) { currentPageNo++; if (page.hasContents()) { processPage(page); } } }