Example usage for org.apache.pdfbox.pdmodel PDDocument importPage

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument importPage.

Prototype

public PDPage importPage(PDPage page) throws IOException

Source Link

Document

This will import and copy the contents from another location.

Usage

From source file:de.offis.health.icardea.cied.pdf.extractor.PDFApachePDFBoxExtractor.java

License:Apache License

@SuppressWarnings("unchecked")
public byte[] getPDFPages(int fromPageNumber, int toPageNumber) {
    ByteArrayOutputStream byteArrayOutputStream = null;
    boolean extractionSuccessful = false;

    if (pdfDocument != null) {
        int numberOfPages = getNumberOfPages();

        /*//from   w w  w. j  a  v  a 2s  .  c o m
         * Check if the given page numbers are in the allowed range.
         */
        if (fromPageNumber > 0 && fromPageNumber <= numberOfPages && toPageNumber > 0
                && toPageNumber <= numberOfPages) {
            /*
             * Now check if the given fromPageNumber is smaller
             * as the given toPageNumber. If not swap the numbers.
             */
            if (fromPageNumber > toPageNumber) {
                int tmpPageNumber = toPageNumber;
                toPageNumber = fromPageNumber;
                fromPageNumber = tmpPageNumber;
            }

            /*
             * Now extract the pages
             * 
             * NOTE
             * ====
             * Since Apache PDFBox v1.5.0 there exists the class
             * org.apache.pdfbox.util.PageExtractor
             */

            /*
            boolean isApachePageExtractorAvailable = false;
            Class<?> pageExtractorClass = null;
            try {
               pageExtractorClass = getClass().getClassLoader().loadClass("org.apache.pdfbox.util.PageExtractor");
               Constructor<?> pdfExtractConstructor = pageExtractorClass.getConstructor(PDDocument.class, int.class, int.class);
               Method pdfExtractMethod = pageExtractorClass.getMethod("extract");
               isApachePageExtractorAvailable = true;
            } catch (ClassNotFoundException ex) {
            } catch (SecurityException ex) {
            } catch (NoSuchMethodException ex) {
            }
            */

            try {
                PDDocument extractedDocumentPages = new PDDocument();
                extractedDocumentPages.setDocumentInformation(this.pdfDocument.getDocumentInformation());
                extractedDocumentPages.getDocumentCatalog()
                        .setViewerPreferences(this.pdfDocument.getDocumentCatalog().getViewerPreferences());

                List<PDPage> pages = (List<PDPage>) this.pdfDocument.getDocumentCatalog().getAllPages();
                int pageCounter = 1;
                for (PDPage page : pages) {
                    if (pageCounter >= fromPageNumber && pageCounter <= toPageNumber) {
                        PDPage importedPdfPage;
                        importedPdfPage = extractedDocumentPages.importPage(page);
                        importedPdfPage.setCropBox(page.findCropBox());
                        importedPdfPage.setMediaBox(page.findMediaBox());
                        importedPdfPage.setResources(page.findResources());
                        importedPdfPage.setRotation(page.findRotation());
                    }
                    pageCounter++;
                } // end for

                byteArrayOutputStream = new ByteArrayOutputStream();
                extractedDocumentPages.save(byteArrayOutputStream);
                extractedDocumentPages.close();
                extractionSuccessful = true;
            } catch (COSVisitorException ex) {
                // TODO: Create an own exception for PDF processing errors.
                logger.error("An exception occurred while extracting " + "pages from the input PDF file.", ex);
            } catch (IOException ex) {
                // TODO: Create an own exception for PDF processing errors.
                logger.error("An exception occurred while extracting " + "pages from the input PDF file.", ex);
            } finally {
                if (!extractionSuccessful) {
                    byteArrayOutputStream = null;
                }
            } // end try..catch..finally
        } // end if checking range of given pages
    } // end if (pdfDocument != null)

    if (byteArrayOutputStream != null) {
        return byteArrayOutputStream.toByteArray();
    }
    return null;
}

From source file:paper2ebook.Transformer.java

License:Apache License

/**
 * Output a PDF with as many pages as there are interesting areas in the
 * input document/*from  www .  ja v  a  2 s .  c  o m*/
 */
@Override
public PDDocument extract() throws IOException {
    PDDocument extractedDocument = new PDDocument();
    extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation());
    extractedDocument.getDocumentCatalog()
            .setViewerPreferences(sourceDocument.getDocumentCatalog().getViewerPreferences());

    @SuppressWarnings("unchecked")
    List<PDPage> pages = sourceDocument.getDocumentCatalog().getAllPages();
    int pageCounter = 1;
    for (PDPage page : pages) {
        if (pageCounter >= startPage && pageCounter <= endPage) {

            List<PDRectangle> zoomedFragments = getFragments(page);
            for (PDRectangle fragment : zoomedFragments) {
                PDPage outputPage = extractedDocument.importPage(page);
                outputPage.setCropBox(fragment);
                outputPage.setMediaBox(page.getMediaBox());
                outputPage.setResources(page.findResources());
                outputPage.setRotation(page.findRotation());

                // TODO: rotate the page in landscape mode is width > height
            }
        }
        pageCounter++;
    }
    return extractedDocument;
}