Example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations.

Prototype

public List<PDAnnotation> getAnnotations() throws IOException

Source Link

Document

This will return a list of the annotations for this page.

Usage

From source file:com.vns.pdf.impl.PdfDocument.java

License:Apache License

private List<Annotation> parseAnnotation(PDPage pdPage) throws IOException {
    List<Annotation> annotations = new ArrayList<>();
    for (PDAnnotation annt : pdPage.getAnnotations()) {
        if (annt instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annt;
            PDRectangle rect = link.getRectangle();
            float x = rect.getLowerLeftX();
            float y = rect.getUpperRightY();
            float width = rect.getWidth();
            float height = rect.getHeight();
            int rotation = pdPage.getRotation();
            if (rotation == 0) {
                PDRectangle pageSize = pdPage.getMediaBox();
                y = pageSize.getHeight() - y;
            } else if (rotation == 90) {
                //do nothing
            }/*w  ww  .  j ava2s  .  c  om*/

            ActionData actionData = parsePDAction(link.getAction());
            if (actionData == null) {
                actionData = parsePDDestination(link.getDestination());
            }
            if (actionData != null) {
                Annotation a = new Annotation(x, y, width, height, actionData.destX, actionData.destY,
                        actionData.destPage, actionData.destZoom);
                annotations.add(a);
            }
        }
    }
    return annotations;
}

From source file:com.zilbo.flamingSailor.TE.PDFParser.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    super.endPage(page);
    int pieceID = 0;
    Map<String, Map<Integer, Long>> fontCounts = new HashMap<>();
    List<TextPiece> wordsOfThisPage = new ArrayList<>();

    for (List<TextPosition> aCharactersByArticle : charactersByArticle) {
        //   int len = aCharactersByArticle.size();
        for (TextPosition t : aCharactersByArticle) {
            // copy information
            TextPiece w = new TextPiece(pieceID++);
            PDFont font = t.getFont();/*  ww w.j  a va  2s  .  c  o  m*/
            PDFontDescriptor fontDescriptor = font.getFontDescriptor();

            //   w.setFontDescriptor(fontDescriptor);
            if (fontDescriptor == null) {
                w.setFontName("UNKNOWN");
            } else {
                w.setFontName(fontDescriptor.getFontName());
            }

            /*
            * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now
            */
            if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) {
                w.setFontSize(t.getFontSize() * 100);
                w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100);
                w.setXScale(t.getXScale());
                w.setYScale(t.getYScale());
            } else {
                if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) {
                    w.setYScale(t.getYScale() * 100);
                    w.setXScale(t.getXScale() * 100);
                    w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize()));
                } else {
                    w.setFontSize(t.getFontSize());
                    w.setHeight(Math.max(t.getYScale(), t.getFontSize()));
                    w.setXScale(t.getXScale());
                    w.setYScale(t.getYScale());
                }
            }

            Map<Integer, Long> counts = fontCounts.get(w.getFontName());
            if (counts == null) {
                counts = new HashMap<>();
                fontCounts.put(w.getFontName(), counts);
            }
            Long count = counts.get((int) Math.round(w.getHeight()));
            if (count == null) {
                count = 1L;
            } else {
                count += 1L;
            }
            counts.put((int) Math.round(w.getHeight()), count);

            w.setWidth(Math.abs(t.getWidth()));
            w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight());

            w.setText(t.getCharacter());

            w.setWidthOfSpace(t.getWidthOfSpace());
            wordsOfThisPage.add(w);
        }
    }
    currentPage.processPage(wordsOfThisPage, fontCounts);
    currentPage.setText(outString.getBuffer().toString());
    outString.getBuffer().setLength(0);
    List<PDAnnotation> annotations = page.getAnnotations();

    for (PDAnnotation annotation : annotations) {
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink l = (PDAnnotationLink) annotation;
            PDRectangle rect = l.getRectangle();
            PDDestination dest = l.getDestination();
            if (dest instanceof PDPageXYZDestination) {
                PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest;
                PDPage pageDest = ((PDPageXYZDestination) dest).getPage();

                if (rect != null) {
                    if (xyzDestination.getPageNumber() < 0) {
                        int pageNumber = allpages.indexOf(pageDest) + 1;
                        Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(),
                                (rect.getUpperRightX() - rect.getLowerLeftX()),
                                (rect.getUpperRightY() - rect.getLowerLeftY()));
                        Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop());
                        currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint));
                    }
                }
            }
        }
    }

    /*
     The following code is REALLY raw.
     initial testing seemed to show memory leaks, and was REALLY slow.
            
    PDResources r = page.getResources();
    Map<String, PDXObjectImage> images = r.getImages();
    for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) {
    BufferedImage bi = null;
    try {
            
        //   currentPage.addImage(bi);
            
        //    (e.getValue()).write2file("/tmp/II" + e.getKey());
        if (e.getValue() instanceof PDJpeg) {
            PDJpeg jpg = (PDJpeg) e.getValue();
            bi = jpg.getRGBImage();
            ColorSpace cs = bi.getColorModel().getColorSpace();
            File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg");
            
            if (cs instanceof ColorSpaceCMYK) {
            
                logger.info("Ignoring image with CMYK color space");
            } else {
               // ImageIO.write(bi, "jpg", jpgFile);
                jpg.write2file("/tmp/II"+ e.getKey());
            }
            
        } else {
            (e.getValue()).write2file("/tmp/II" + e.getKey());
        }
    } catch (Exception ee) {
        logger.info("can't read image ;-(", ee);
    }
            
    }
    */

    textPageList.add(currentPage);
    currentPage = null;
}

From source file:de.berber.kindle.annotator.lib.Annotation.java

License:Apache License

/**
 * Checks whether the current belongs to the page {@code currentPageNumber}
 * and adds the annotation in such an case.
 * //from w  ww. j a  v a  2s .co m
 * @param currentPageNumber
 *            The page number of {@code page}.
 * @param documentOutline
 *            The pdf outline
 * @param page
 *            The pdf page.
 */
@SuppressWarnings("unchecked")
public void toPDAnnotation(final @Nonnegative int currentPageNumber,
        final @Nonnull PDDocumentOutline documentOutline, final @Nonnull PDPage page) {
    if (this.page != currentPageNumber) {
        return;
    }

    try {
        final List<PDAnnotation> annotations = ((List<PDAnnotation>) page.getAnnotations());
        final PDAnnotation annotation = toPDAnnotation(documentOutline, page);

        if (annotation != null) {
            annotations.add(annotation);
        }
    } catch (IOException e) {
        LOG.error("Cannot read annotations from PDF.");
    }
}

From source file:dev.ztgnrw.ExtractEmbeddedFiles.java

License:Apache License

/**
 * This is the main method./*ww w .j a  v a  2  s  .  co m*/
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void extractEmbeddedFiles(String file) throws IOException {

    PDDocument document = null;
    try {
        File pdfFile = new File(file);
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree != null) {
            Map<String, PDComplexFileSpecification> names = efTree.getNames();
            if (names != null) {
                extractFiles(names, filePath);
            } else {
                List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    names = node.getNames();
                    extractFiles(names, filePath);
                }
            }
        }

        // extract files from annotations
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                    PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment
                            .getFile();
                    PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                    extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                }
            }
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:fi.riista.feature.permit.invoice.pdf.PermitHarvestInvoicePdfBuilder.java

private void addFormFieldData() throws IOException {
    textField("iban", model.getInvoiceAccountDetails().getCombinedBankNameAndIbanString());
    textField("bic", model.getInvoiceAccountDetails().getBic().toString());
    textField("saaja", model.getPaymentRecipient());
    textField("maksaja", Joiner.on('\n').join(model.getInvoiceRecipient().formatAsLines()));
    textField("summa", model.getAmountText());
    textField("viitenumero", model.getInvoiceReferenceForHuman());
    textField("lisatiedot", model.getInvoiceAdditionalInfo());

    this.acroForm.setNeedAppearances(false);

    // Fix annotations
    for (PDPage page : this.pdfDocument.getPages()) {
        for (PDAnnotation annot : page.getAnnotations()) {
            annot.setPage(page);/*w ww. j  a v a 2 s.  c om*/
        }
    }

    // Define font resources names used in PDF template
    final PDResources dr = new PDResources();
    dr.put(COSName.getPDFName("Helv"), PDType1Font.HELVETICA);
    dr.put(COSName.getPDFName("HeBo"), PDType1Font.HELVETICA_BOLD);
    this.acroForm.setDefaultResources(dr);

    // Convert form fields to text
    this.acroForm.flatten();
}

From source file:helper.pdfpreprocessing.pdf.TextHighlight.java

License:Apache License

private boolean markupMatch(Color color, PDPageContentStream contentStream, Match markingMatch, int height,
        boolean withId, PDPage page, String comment, boolean commentOnly) throws IOException {
    final List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(markingMatch.positions);

    if (textBoundingBoxes.size() > 0) {
        contentStream.setNonStrokingColor(color);
        for (PDRectangle textBoundingBox : textBoundingBoxes) {
            if (comment.isEmpty()) {
                contentStream.addRect(textBoundingBox.getLowerLeftX(), textBoundingBox.getLowerLeftY(), Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10),
                        height);/*from  w w  w  .jav  a  2s. c  om*/
                contentStream.fill();
            }
            if (withId) {
                PDFont font = PDType1Font.HELVETICA;
                contentStream.beginText();
                contentStream.setFont(font, 5);
                contentStream.newLineAtOffset(textBoundingBox.getUpperRightX(),
                        textBoundingBox.getUpperRightY());
                contentStream.showText(markingMatch.str);
                contentStream.endText();
            }
            if (!comment.isEmpty() && !commentOnly) {
                PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(
                        PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                PDRectangle position = new PDRectangle();
                position.setLowerLeftX(textBoundingBox.getLowerLeftX());
                position.setLowerLeftY(textBoundingBox.getLowerLeftY());
                position.setUpperRightX(textBoundingBox.getLowerLeftX() + Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10));
                position.setUpperRightY(textBoundingBox.getLowerLeftY() + 10);
                txtMark.setRectangle(position);

                float[] quads = new float[8];
                quads[0] = position.getLowerLeftX(); // x1
                quads[1] = position.getUpperRightY() - 2; // y1
                quads[2] = position.getUpperRightX(); // x2
                quads[3] = quads[1]; // y2
                quads[4] = quads[0]; // x3
                quads[5] = position.getLowerLeftY() - 2; // y3
                quads[6] = quads[2]; // x4
                quads[7] = quads[5]; // y5
                txtMark.setQuadPoints(quads);
                txtMark.setConstantOpacity((float) 0.5);
                txtMark.setContents("Missing Assumption/s (" + markingMatch.str + "):\n" + comment);
                float[] colorArray = new float[] { 0, 0, 0 };
                colorArray = color.getColorComponents(colorArray);
                PDColor hColor = new PDColor(colorArray, PDDeviceRGB.INSTANCE);
                txtMark.setColor(hColor);
                txtMark.setCreationDate(Calendar.getInstance());
                txtMark.setTitlePopup("Assumption Error");
                page.getAnnotations().add(txtMark);
            } else if (!comment.isEmpty() && commentOnly) {
                for (int i = 0; i < page.getAnnotations().size(); i++) {
                    String extractedComment = page.getAnnotations().get(i).getContents();
                    if (extractedComment != null) {
                        String commentID = extractedComment.substring(extractedComment.indexOf("(") + 1,
                                extractedComment.indexOf(")"));
                        if (markingMatch.str.equals(commentID) && extractedComment.contains(comment)) {
                            page.getAnnotations().get(i).setContents(extractedComment + "\n" + comment);
                        }

                    }
                }
            }
        }
        return true;
    }
    return false;
}

From source file:hightlighting.PDFTextAnnotator.java

License:Apache License

/**
 * Highlights a pattern within the PDF with the default color 
 * Returns the list of added annotations for further modification
 * Note: it will process every page, but cannot process patterns that span multiple pages 
 * Note: it will not work for top-bottom text (such as Chinese)
 * /*from  ww w  . j  a  v  a 2s  .c  o m*/
 * @param pdf
 *          PDDocument
 * @param pattern
 *          Pattern (regex)
 * @throws Exception
 */
public List<PDAnnotationTextMarkup> highlight(PDDocument pdf, Pattern pattern) throws Exception {
    if (textCache == null) {
        throw new Exception("TextCache was not initilized, please run initialize on the document first");
    }

    List<PDPage> pages = pdf.getDocumentCatalog().getAllPages();

    ArrayList<PDAnnotationTextMarkup> highligts = new ArrayList<PDAnnotationTextMarkup>();

    for (int pageIndex = getStartPage() - 1; pageIndex < getEndPage()
            && pageIndex < pages.size(); pageIndex++) {
        PDPage page = pages.get(pageIndex);
        List<PDAnnotation> annotations = page.getAnnotations();

        List<Match> matches = this.textCache.getTextPositions(pageIndex + 1, pattern);

        for (Match match : matches) {
            List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(match.positions);

            if (textBoundingBoxes.size() > 0) {

                float[] quads = new float[8];
                int cursor = 0;
                for (PDRectangle rect : textBoundingBoxes) {
                    PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(
                            PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                    markup.setRectangle(rect);
                    float[] tmp = computeQuads(rect);
                    for (int i = 0; i < tmp.length; i++) {
                        quads[cursor + i] = tmp[i];
                    }
                    //cursor = cursor + 8;
                    markup.setQuadPoints(quads);

                    markup.setConstantOpacity((float) 0.8);
                    markup.setColour(getDefaultColor());
                    markup.setPrinted(true);
                    markup.setContents(match.str);

                    annotations.add(markup);
                    highligts.add(markup);
                }
            }
        }
    }
    return highligts;
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {// www . ja  va 2  s . c  om
        writeParagraphEnd();
        // TODO: remove once PDFBOX-1143 is fixed:
        if (config.getExtractAnnotationText()) {
            for (Object o : page.getAnnotations()) {
                if (o instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) o;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (o instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annot = (PDAnnotationMarkup) o;
                    String title = annot.getTitlePopup();
                    String subject = annot.getSubject();
                    String contents = annot.getContents();
                    // TODO: maybe also annot.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }
        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;//from   ww  w.j  av  a 2 s . co  m

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:net.padaf.preflight.helpers.PagesValidationHelper.java

License:Apache License

/**
 * /*from   w ww .j ava 2 s.  c o m*/
 * @param page
 * @param handler
 * @param result
 * @return
 * @throws ValidationException
 */
protected boolean validateAnnotation(PDPage page, DocumentHandler handler, List<ValidationError> result)
        throws ValidationException {
    try {
        List<?> lAnnots = page.getAnnotations();
        for (Object object : lAnnots) {
            if (object instanceof PDAnnotation) {

                COSDictionary cosAnnot = ((PDAnnotation) object).getDictionary();
                AnnotationValidator validator = this.annotFact.getAnnotationValidator(cosAnnot, handler,
                        result);
                if (validator != null) {
                    return validator.validate(result);
                }

            }
        }

    } catch (IOException e) {
        throw new ValidationException("Unable to access Annotation", e);
    }
    // --- No annotations, validation OK
    return true;
}