Example usage for org.apache.pdfbox.pdmodel PDDocument getPages

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getPages.

Prototype

public PDPageTree getPages() 

Source Link

Document

Returns the page tree.

Usage

From source file:dev.ztgnrw.ExtractEmbeddedFiles.java

License:Apache License

/**
 * This is the main method./*from w w w .  j  a v  a2  s.c om*/
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void extractEmbeddedFiles(String file) throws IOException {

    PDDocument document = null;
    try {
        File pdfFile = new File(file);
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree != null) {
            Map<String, PDComplexFileSpecification> names = efTree.getNames();
            if (names != null) {
                extractFiles(names, filePath);
            } else {
                List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    names = node.getNames();
                    extractFiles(names, filePath);
                }
            }
        }

        // extract files from annotations
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                    PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment
                            .getFile();
                    PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                    extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                }
            }
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:eu.europa.esig.dss.pades.signature.PAdESVisibleSignaturePositionTest.java

License:Open Source License

private void checkRotation(InputStream inputStream, int rotate) throws IOException {
    PDDocument document = PDDocument.load(inputStream);

    Assert.assertEquals(rotate, document.getPages().get(0).getRotation());
}

From source file:eu.europa.esig.dss.pades.signature.PAdESVisibleSignaturePositionTest.java

License:Open Source License

private void checkImageSimilarityPdf(String samplePdf, String checkPdf, float similarity) throws IOException {
    DSSDocument document = sign(signablePdfs.get(samplePdf));
    PDDocument sampleDocument = PDDocument.load(document.openStream());
    PDDocument checkDocument = PDDocument
            .load(getClass().getResourceAsStream("/visualSignature/check/" + checkPdf));

    PDPageTree samplePageTree = sampleDocument.getPages();
    PDPageTree checkPageTree = checkDocument.getPages();

    Assert.assertEquals(checkPageTree.getCount(), samplePageTree.getCount());

    PDFRenderer sampleRenderer = new PDFRenderer(sampleDocument);
    PDFRenderer checkRenderer = new PDFRenderer(checkDocument);

    for (int pageNumber = 0; pageNumber < checkPageTree.getCount(); pageNumber++) {
        BufferedImage sampleImage = sampleRenderer.renderImageWithDPI(pageNumber, DPI);
        BufferedImage checkImage = checkRenderer.renderImageWithDPI(pageNumber, DPI);

        float checkSimilarity = checkImageSimilarity(sampleImage, checkImage, CHECK_RESOLUTION);
        float calculatedSimilarity = ((int) (similarity * 100f)) / 100f; // calulate rotated position has about 1
        // pixel position difference
        Assert.assertTrue(checkSimilarity >= calculatedSimilarity);
    }//from  w ww.  j  a  v  a  2 s  . co m
}

From source file:helper.pdfpreprocessing.pdf.TextHighlight.java

License:Apache License

public void initialize(final PDDocument pdf) throws IOException {
    try {/*  w w  w  .  j ava  2 s  . c o  m*/
        resetEngine();
        document = pdf;
        textCache = new TextCache();

        if (getAddMoreFormatting()) {
            setParagraphEnd(getLineSeparator());
            setPageStart(getLineSeparator());
            setArticleStart(getLineSeparator());
            setArticleEnd(getLineSeparator());
        }
        startDocument(pdf);
        processPages(pdf.getPages());
        endDocument(pdf);
    } catch (Exception e) {
        e.printStackTrace();
    } catch (Error e) {
        e.printStackTrace();
    }
}

From source file:model.util.pdf.PDFUtils.java

License:Apache License

private static void changeCropBox(PDDocument document, float a, float b, float c, float d) {
    for (PDPage page : document.getPages()) {
        PDRectangle rectangle = new PDRectangle();
        rectangle.setLowerLeftX(a);//from ww w  .j  a  va 2  s .c  o  m
        rectangle.setLowerLeftY(b);
        rectangle.setUpperRightX(c);
        rectangle.setUpperRightY(d);
        page.setCropBox(rectangle);

    }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {//from  ww  w.j a v  a2 s . c o m
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:org.example.extractimagesfrompdfpages.ExtractImagesFromPDFPagesMain.java

public static void main(String[] args) {
    try {/*from  w w w .j  av a2s .c  o  m*/
        File thePDFFile = new File(args[0]);
        PDDocument document = PDDocument.load(thePDFFile);
        PDPageTree list = document.getPages();
        int i = 1;
        for (PDPage page : list) {
            Boolean alreadyCreatedFolderForThisPage = false;
            File thePDFFileDirectory = thePDFFile.getParentFile();
            File thePDFPageFolder = new File(thePDFFileDirectory.getAbsolutePath() + "/temp_images" + "/" + i);
            PDResources pdResources = page.getResources();
            int j = 1;
            for (COSName c : pdResources.getXObjectNames()) {
                PDXObject o = pdResources.getXObject(c);
                if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) {
                    if (alreadyCreatedFolderForThisPage == false) {
                        thePDFPageFolder.mkdirs();
                        alreadyCreatedFolderForThisPage = true;
                    }

                    File file = new File(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png");
                    ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) o).getImage(),
                            "png", file);
                    System.out.println(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png");

                    j++;
                }

            }
            i++;
        }
    } catch (IOException ex) {
        Logger.getLogger(ExtractImagesFromPDFPagesMain.class.getName()).log(Level.SEVERE, null, ex);
        throw new RuntimeException(ex);
    }

}

From source file:org.pdfmetamodifier.IOHelper.java

License:Apache License

/**
 * Save all Attached (embedded) files to some directory.
 * /*from www.ja v  a  2 s  . c om*/
 * @param pdfFile
 *            Source PDF file.
 * @param outputDir
 *            Target directory.
 * @throws IOException
 */
/*
 * See:
 *      https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java?view=markup
 */
public static void saveAttachments(final File pdfFile, final File outputDir) throws IOException {
    PDDocument document = null;
    try {
        // Read PDF file.
        document = PDDocument.load(pdfFile);
        if (document.isEncrypted()) {
            throw new IOException("Document is encrypted.");
        }

        // Extract Embedded (attached) files.
        final PDDocumentNameDictionary documentNameDictionary = new PDDocumentNameDictionary(
                document.getDocumentCatalog());
        final PDEmbeddedFilesNameTreeNode embeddedFilesNameTree = documentNameDictionary.getEmbeddedFiles();
        if (embeddedFilesNameTree != null) {
            extractFiles(outputDir, embeddedFilesNameTree.getNames());

            final List<PDNameTreeNode<PDComplexFileSpecification>> kids = embeddedFilesNameTree.getKids();
            if (kids != null) {
                for (PDNameTreeNode<PDComplexFileSpecification> nameTreeNode : kids) {
                    extractFiles(outputDir, nameTreeNode.getNames());
                }
            }
        }

        // Extract Embedded (attached) from annotations.
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    final PDAnnotationFileAttachment fileAttach = (PDAnnotationFileAttachment) annotation;

                    final PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fileAttach
                            .getFile();
                    extractFile(outputDir, fileSpec);
                }
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.pdfsam.pdfbox.PDFBoxOutlineLevelsHandler.java

License:Open Source License

public PDFBoxOutlineLevelsHandler(PDDocument document, String matchingTitleRegEx) {
    requireNonNull(document, "Unable to retrieve bookmarks from a null document.");
    this.document = document;
    this.pages = document.getPages();
    PDDocumentNameDictionary names = document.getDocumentCatalog().getNames();
    if (names != null) {
        this.namedDestinations = names.getDests();
    }/*from  w  w  w.  j  a  v a  2s  .  com*/
    if (isNotBlank(matchingTitleRegEx)) {
        this.titleMatchingPattern = Pattern.compile(matchingTitleRegEx);
    }
}

From source file:src.controller.DocumentController.java

/**
 * Convertion en image/* ww  w. j  av  a2s . c  om*/
 *
 * @param file
 * @param extension
 */
public void convertToImage(File file, String extension) {
    PDDocument document;
    try {
        document = PDDocument.load(file);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        int pageCounter = 0;

        for (PDPage lapage : document.getPages()) {
            // note that the page number parameter is zero based
            BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            ImageIOUtil.writeImage(bim, file + "-" + (pageCounter++) + extension, 300);
        }
        document.close();
    } catch (IOException ex) {
        Logger.getLogger(DocumentController.class.getName()).log(Level.SEVERE, null, ex);
    }
}