Example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations

List of usage examples for org.apache.pdfbox.pdmodel PDPage getAnnotations

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDPage getAnnotations.

Prototype

public List<PDAnnotation> getAnnotations() throws IOException 

Source Link

Document

This will return a list of the annotations for this page.

Usage

From source file:com.vns.pdf.impl.PdfDocument.java

License:Apache License

private List<Annotation> parseAnnotation(PDPage pdPage) throws IOException {
    List<Annotation> annotations = new ArrayList<>();
    for (PDAnnotation annt : pdPage.getAnnotations()) {
        if (annt instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annt;
            PDRectangle rect = link.getRectangle();
            float x = rect.getLowerLeftX();
            float y = rect.getUpperRightY();
            float width = rect.getWidth();
            float height = rect.getHeight();
            int rotation = pdPage.getRotation();
            if (rotation == 0) {
                PDRectangle pageSize = pdPage.getMediaBox();
                y = pageSize.getHeight() - y;
            } else if (rotation == 90) {
                //do nothing
            }/*w  ww  .  j ava2s  .  c  om*/

            ActionData actionData = parsePDAction(link.getAction());
            if (actionData == null) {
                actionData = parsePDDestination(link.getDestination());
            }
            if (actionData != null) {
                Annotation a = new Annotation(x, y, width, height, actionData.destX, actionData.destY,
                        actionData.destPage, actionData.destZoom);
                annotations.add(a);
            }
        }
    }
    return annotations;
}

From source file:com.zilbo.flamingSailor.TE.PDFParser.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    super.endPage(page);
    int pieceID = 0;
    Map<String, Map<Integer, Long>> fontCounts = new HashMap<>();
    List<TextPiece> wordsOfThisPage = new ArrayList<>();

    for (List<TextPosition> aCharactersByArticle : charactersByArticle) {
        //   int len = aCharactersByArticle.size();
        for (TextPosition t : aCharactersByArticle) {
            // copy information
            TextPiece w = new TextPiece(pieceID++);
            PDFont font = t.getFont();/*  ww w.j  a va  2s  .  c  o  m*/
            PDFontDescriptor fontDescriptor = font.getFontDescriptor();

            //   w.setFontDescriptor(fontDescriptor);
            if (fontDescriptor == null) {
                w.setFontName("UNKNOWN");
            } else {
                w.setFontName(fontDescriptor.getFontName());
            }

            /*
            * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now
            */
            if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) {
                w.setFontSize(t.getFontSize() * 100);
                w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100);
                w.setXScale(t.getXScale());
                w.setYScale(t.getYScale());
            } else {
                if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) {
                    w.setYScale(t.getYScale() * 100);
                    w.setXScale(t.getXScale() * 100);
                    w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize()));
                } else {
                    w.setFontSize(t.getFontSize());
                    w.setHeight(Math.max(t.getYScale(), t.getFontSize()));
                    w.setXScale(t.getXScale());
                    w.setYScale(t.getYScale());
                }
            }

            Map<Integer, Long> counts = fontCounts.get(w.getFontName());
            if (counts == null) {
                counts = new HashMap<>();
                fontCounts.put(w.getFontName(), counts);
            }
            Long count = counts.get((int) Math.round(w.getHeight()));
            if (count == null) {
                count = 1L;
            } else {
                count += 1L;
            }
            counts.put((int) Math.round(w.getHeight()), count);

            w.setWidth(Math.abs(t.getWidth()));
            w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight());

            w.setText(t.getCharacter());

            w.setWidthOfSpace(t.getWidthOfSpace());
            wordsOfThisPage.add(w);
        }
    }
    currentPage.processPage(wordsOfThisPage, fontCounts);
    currentPage.setText(outString.getBuffer().toString());
    outString.getBuffer().setLength(0);
    List<PDAnnotation> annotations = page.getAnnotations();

    for (PDAnnotation annotation : annotations) {
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink l = (PDAnnotationLink) annotation;
            PDRectangle rect = l.getRectangle();
            PDDestination dest = l.getDestination();
            if (dest instanceof PDPageXYZDestination) {
                PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest;
                PDPage pageDest = ((PDPageXYZDestination) dest).getPage();

                if (rect != null) {
                    if (xyzDestination.getPageNumber() < 0) {
                        int pageNumber = allpages.indexOf(pageDest) + 1;
                        Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(),
                                (rect.getUpperRightX() - rect.getLowerLeftX()),
                                (rect.getUpperRightY() - rect.getLowerLeftY()));
                        Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop());
                        currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint));
                    }
                }
            }
        }
    }

    /*
     The following code is REALLY raw.
     initial testing seemed to show memory leaks, and was REALLY slow.
            
    PDResources r = page.getResources();
    Map<String, PDXObjectImage> images = r.getImages();
    for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) {
    BufferedImage bi = null;
    try {
            
        //   currentPage.addImage(bi);
            
        //    (e.getValue()).write2file("/tmp/II" + e.getKey());
        if (e.getValue() instanceof PDJpeg) {
            PDJpeg jpg = (PDJpeg) e.getValue();
            bi = jpg.getRGBImage();
            ColorSpace cs = bi.getColorModel().getColorSpace();
            File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg");
            
            if (cs instanceof ColorSpaceCMYK) {
            
                logger.info("Ignoring image with CMYK color space");
            } else {
               // ImageIO.write(bi, "jpg", jpgFile);
                jpg.write2file("/tmp/II"+ e.getKey());
            }
            
        } else {
            (e.getValue()).write2file("/tmp/II" + e.getKey());
        }
    } catch (Exception ee) {
        logger.info("can't read image ;-(", ee);
    }
            
    }
    */

    textPageList.add(currentPage);
    currentPage = null;
}

From source file:de.berber.kindle.annotator.lib.Annotation.java

License:Apache License

/**
 * Checks whether the current belongs to the page {@code currentPageNumber}
 * and adds the annotation in such an case.
 * //from w  ww. j a  v a  2s .co m
 * @param currentPageNumber
 *            The page number of {@code page}.
 * @param documentOutline
 *            The pdf outline
 * @param page
 *            The pdf page.
 */
@SuppressWarnings("unchecked")
public void toPDAnnotation(final @Nonnegative int currentPageNumber,
        final @Nonnull PDDocumentOutline documentOutline, final @Nonnull PDPage page) {
    if (this.page != currentPageNumber) {
        return;
    }

    try {
        final List<PDAnnotation> annotations = ((List<PDAnnotation>) page.getAnnotations());
        final PDAnnotation annotation = toPDAnnotation(documentOutline, page);

        if (annotation != null) {
            annotations.add(annotation);
        }
    } catch (IOException e) {
        LOG.error("Cannot read annotations from PDF.");
    }
}

From source file:dev.ztgnrw.ExtractEmbeddedFiles.java

License:Apache License

/**
 * This is the main method./*ww w .j a  v a  2  s  .  co m*/
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void extractEmbeddedFiles(String file) throws IOException {

    PDDocument document = null;
    try {
        File pdfFile = new File(file);
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree != null) {
            Map<String, PDComplexFileSpecification> names = efTree.getNames();
            if (names != null) {
                extractFiles(names, filePath);
            } else {
                List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    names = node.getNames();
                    extractFiles(names, filePath);
                }
            }
        }

        // extract files from annotations
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                    PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment
                            .getFile();
                    PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                    extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                }
            }
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:fi.riista.feature.permit.invoice.pdf.PermitHarvestInvoicePdfBuilder.java

private void addFormFieldData() throws IOException {
    textField("iban", model.getInvoiceAccountDetails().getCombinedBankNameAndIbanString());
    textField("bic", model.getInvoiceAccountDetails().getBic().toString());
    textField("saaja", model.getPaymentRecipient());
    textField("maksaja", Joiner.on('\n').join(model.getInvoiceRecipient().formatAsLines()));
    textField("summa", model.getAmountText());
    textField("viitenumero", model.getInvoiceReferenceForHuman());
    textField("lisatiedot", model.getInvoiceAdditionalInfo());

    this.acroForm.setNeedAppearances(false);

    // Fix annotations
    for (PDPage page : this.pdfDocument.getPages()) {
        for (PDAnnotation annot : page.getAnnotations()) {
            annot.setPage(page);/*w ww. j  a v a 2 s.  c om*/
        }
    }

    // Define font resources names used in PDF template
    final PDResources dr = new PDResources();
    dr.put(COSName.getPDFName("Helv"), PDType1Font.HELVETICA);
    dr.put(COSName.getPDFName("HeBo"), PDType1Font.HELVETICA_BOLD);
    this.acroForm.setDefaultResources(dr);

    // Convert form fields to text
    this.acroForm.flatten();
}

From source file:helper.pdfpreprocessing.pdf.TextHighlight.java

License:Apache License

private boolean markupMatch(Color color, PDPageContentStream contentStream, Match markingMatch, int height,
        boolean withId, PDPage page, String comment, boolean commentOnly) throws IOException {
    final List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(markingMatch.positions);

    if (textBoundingBoxes.size() > 0) {
        contentStream.setNonStrokingColor(color);
        for (PDRectangle textBoundingBox : textBoundingBoxes) {
            if (comment.isEmpty()) {
                contentStream.addRect(textBoundingBox.getLowerLeftX(), textBoundingBox.getLowerLeftY(), Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10),
                        height);/*from  w w  w  .jav  a  2s. c  om*/
                contentStream.fill();
            }
            if (withId) {
                PDFont font = PDType1Font.HELVETICA;
                contentStream.beginText();
                contentStream.setFont(font, 5);
                contentStream.newLineAtOffset(textBoundingBox.getUpperRightX(),
                        textBoundingBox.getUpperRightY());
                contentStream.showText(markingMatch.str);
                contentStream.endText();
            }
            if (!comment.isEmpty() && !commentOnly) {
                PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(
                        PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                PDRectangle position = new PDRectangle();
                position.setLowerLeftX(textBoundingBox.getLowerLeftX());
                position.setLowerLeftY(textBoundingBox.getLowerLeftY());
                position.setUpperRightX(textBoundingBox.getLowerLeftX() + Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10));
                position.setUpperRightY(textBoundingBox.getLowerLeftY() + 10);
                txtMark.setRectangle(position);

                float[] quads = new float[8];
                quads[0] = position.getLowerLeftX(); // x1
                quads[1] = position.getUpperRightY() - 2; // y1
                quads[2] = position.getUpperRightX(); // x2
                quads[3] = quads[1]; // y2
                quads[4] = quads[0]; // x3
                quads[5] = position.getLowerLeftY() - 2; // y3
                quads[6] = quads[2]; // x4
                quads[7] = quads[5]; // y5
                txtMark.setQuadPoints(quads);
                txtMark.setConstantOpacity((float) 0.5);
                txtMark.setContents("Missing Assumption/s (" + markingMatch.str + "):\n" + comment);
                float[] colorArray = new float[] { 0, 0, 0 };
                colorArray = color.getColorComponents(colorArray);
                PDColor hColor = new PDColor(colorArray, PDDeviceRGB.INSTANCE);
                txtMark.setColor(hColor);
                txtMark.setCreationDate(Calendar.getInstance());
                txtMark.setTitlePopup("Assumption Error");
                page.getAnnotations().add(txtMark);
            } else if (!comment.isEmpty() && commentOnly) {
                for (int i = 0; i < page.getAnnotations().size(); i++) {
                    String extractedComment = page.getAnnotations().get(i).getContents();
                    if (extractedComment != null) {
                        String commentID = extractedComment.substring(extractedComment.indexOf("(") + 1,
                                extractedComment.indexOf(")"));
                        if (markingMatch.str.equals(commentID) && extractedComment.contains(comment)) {
                            page.getAnnotations().get(i).setContents(extractedComment + "\n" + comment);
                        }

                    }
                }
            }
        }
        return true;
    }
    return false;
}

From source file:hightlighting.PDFTextAnnotator.java

License:Apache License

/**
 * Highlights a pattern within the PDF with the default color 
 * Returns the list of added annotations for further modification
 * Note: it will process every page, but cannot process patterns that span multiple pages 
 * Note: it will not work for top-bottom text (such as Chinese)
 * /*from  ww w  . j  a  v  a 2s  .c  o m*/
 * @param pdf
 *          PDDocument
 * @param pattern
 *          Pattern (regex)
 * @throws Exception
 */
public List<PDAnnotationTextMarkup> highlight(PDDocument pdf, Pattern pattern) throws Exception {
    if (textCache == null) {
        throw new Exception("TextCache was not initilized, please run initialize on the document first");
    }

    List<PDPage> pages = pdf.getDocumentCatalog().getAllPages();

    ArrayList<PDAnnotationTextMarkup> highligts = new ArrayList<PDAnnotationTextMarkup>();

    for (int pageIndex = getStartPage() - 1; pageIndex < getEndPage()
            && pageIndex < pages.size(); pageIndex++) {
        PDPage page = pages.get(pageIndex);
        List<PDAnnotation> annotations = page.getAnnotations();

        List<Match> matches = this.textCache.getTextPositions(pageIndex + 1, pattern);

        for (Match match : matches) {
            List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(match.positions);

            if (textBoundingBoxes.size() > 0) {

                float[] quads = new float[8];
                int cursor = 0;
                for (PDRectangle rect : textBoundingBoxes) {
                    PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(
                            PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                    markup.setRectangle(rect);
                    float[] tmp = computeQuads(rect);
                    for (int i = 0; i < tmp.length; i++) {
                        quads[cursor + i] = tmp[i];
                    }
                    //cursor = cursor + 8;
                    markup.setQuadPoints(quads);

                    markup.setConstantOpacity((float) 0.8);
                    markup.setColour(getDefaultColor());
                    markup.setPrinted(true);
                    markup.setContents(match.str);

                    annotations.add(markup);
                    highligts.add(markup);
                }
            }
        }
    }
    return highligts;
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    try {// www . ja  va 2  s . c  om
        writeParagraphEnd();
        // TODO: remove once PDFBOX-1143 is fixed:
        if (config.getExtractAnnotationText()) {
            for (Object o : page.getAnnotations()) {
                if (o instanceof PDAnnotationLink) {
                    PDAnnotationLink annotationlink = (PDAnnotationLink) o;
                    if (annotationlink.getAction() != null) {
                        PDAction action = annotationlink.getAction();
                        if (action instanceof PDActionURI) {
                            PDActionURI uri = (PDActionURI) action;
                            String link = uri.getURI();
                            if (link != null) {
                                handler.startElement("div", "class", "annotation");
                                handler.startElement("a", "href", link);
                                handler.endElement("a");
                                handler.endElement("div");
                            }
                        }
                    }
                }

                if (o instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annot = (PDAnnotationMarkup) o;
                    String title = annot.getTitlePopup();
                    String subject = annot.getSubject();
                    String contents = annot.getContents();
                    // TODO: maybe also annot.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        handler.startElement("div", "class", "annotation");

                        if (title != null) {
                            handler.startElement("div", "class", "annotationTitle");
                            handler.characters(title);
                            handler.endElement("div");
                        }

                        if (subject != null) {
                            handler.startElement("div", "class", "annotationSubject");
                            handler.characters(subject);
                            handler.endElement("div");
                        }

                        if (contents != null) {
                            handler.startElement("div", "class", "annotationContents");
                            handler.characters(contents);
                            handler.endElement("div");
                        }

                        handler.endElement("div");
                    }
                }
            }
        }
        handler.endElement("div");
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;//from   ww  w.j  av  a 2 s . co  m

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:net.padaf.preflight.helpers.PagesValidationHelper.java

License:Apache License

/**
 * /*from   w ww .j ava 2 s.  c o m*/
 * @param page
 * @param handler
 * @param result
 * @return
 * @throws ValidationException
 */
protected boolean validateAnnotation(PDPage page, DocumentHandler handler, List<ValidationError> result)
        throws ValidationException {
    try {
        List<?> lAnnots = page.getAnnotations();
        for (Object object : lAnnots) {
            if (object instanceof PDAnnotation) {

                COSDictionary cosAnnot = ((PDAnnotation) object).getDictionary();
                AnnotationValidator validator = this.annotFact.getAnnotationValidator(cosAnnot, handler,
                        result);
                if (validator != null) {
                    return validator.validate(result);
                }

            }
        }

    } catch (IOException e) {
        throw new ValidationException("Unable to access Annotation", e);
    }
    // --- No annotations, validation OK
    return true;
}