Example usage for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX.

Prototype

public float getLowerLeftX()

Source Link

Document

This will get the lower left x coordinate.

Usage

From source file:de.berber.kindle.annotator.lib.Comment.java

License:Apache License

@Override
protected PDAnnotation toPDAnnotation(final @Nonnull PDDocumentOutline documentOutline,
        final @Nonnull PDPage page) {
    LOG.info("Creating annotation " + xPositionFactor + "/" + yPositionFactor + " -> " + text);

    // Create annotation text with background color
    final PDGamma pdColor = getColor();
    final PDAnnotationText textAnnotation = new PDAnnotationText();
    textAnnotation.setContents(getText());
    textAnnotation.setColour(pdColor);//from w w w.  j a  v a 2s .  c  om

    // set the text position
    final PDRectangle cropBox = page.getTrimBox();
    final PDRectangle position = new PDRectangle();
    position.setLowerLeftX((float) (cropBox.getLowerLeftX()
            + xPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX())));
    position.setUpperRightX((float) (cropBox.getLowerLeftX()
            + xPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX())));

    position.setUpperRightY((float) (cropBox.getUpperRightY()
            - yPositionFactor * (cropBox.getUpperRightY() - cropBox.getLowerLeftY())));
    position.setLowerLeftY((float) (cropBox.getUpperRightY()
            - yPositionFactor * (cropBox.getUpperRightY() - cropBox.getLowerLeftY())));

    textAnnotation.setRectangle(position);

    return textAnnotation;
}

From source file:de.berber.kindle.annotator.lib.Marking.java

License:Apache License

@Override
protected PDAnnotation toPDAnnotation(final PDDocumentOutline documentOutline, final PDPage page) {
    LOG.info("Creating marking " + leftXPositionFactor + "/" + lowerYPositionFactor + " -> "
            + rightXPositionFactor + "/" + upperYPositionFactor);

    // create highlighted area
    final PDGamma pdColor = getColor();
    // final PDFont font = PDType1Font.HELVETICA_BOLD;
    // float textHeight = font.getFontHeight("Hg".getBytes(), 0, 2);

    final PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(
            PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
    txtMark.setColour(pdColor);/*w  w w  . ja v a  2  s . c  om*/
    txtMark.setConstantOpacity(opacity);

    if (comment != null) {
        // set comment if available
        txtMark.setContents(comment.getText());
    }

    // Set the rectangle containing the markup
    final PDRectangle cropBox = page.getTrimBox();

    final PDRectangle position = new PDRectangle();
    position.setLowerLeftX((float) (cropBox.getLowerLeftX()
            + leftXPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX())));
    position.setUpperRightX((float) (cropBox.getLowerLeftX()
            + rightXPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX())));

    position.setLowerLeftY((float) (cropBox.getUpperRightY()
            - (lowerYPositionFactor + ((upperYPositionFactor - lowerYPositionFactor == 0.0) ? 0.025 : 0.00))
                    * (cropBox.getUpperRightY() - cropBox.getLowerLeftY())));
    position.setUpperRightY((float) (cropBox.getUpperRightY()
            - (upperYPositionFactor) * (cropBox.getUpperRightY() - cropBox.getLowerLeftY())));

    txtMark.setRectangle(position);
    // work out the points forming the four corners of the annotations
    // set out in anti clockwise form (Completely wraps the text)
    // OK, the below doesn't match that description.
    // It's what acrobat 7 does and displays properly!

    float[] quads = new float[8];

    quads[0] = position.getLowerLeftX(); // x1
    quads[1] = position.getUpperRightY(); // y1
    quads[2] = position.getUpperRightX(); // x2
    quads[3] = position.getUpperRightY(); // y2
    quads[4] = position.getLowerLeftX(); // x3
    quads[5] = position.getLowerLeftY(); // y3
    quads[6] = position.getUpperRightX(); // x4
    quads[7] = position.getLowerLeftY(); // y5

    txtMark.setQuadPoints(quads);

    return txtMark;
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.pdf.PdfLayoutEventStripper.java

License:Apache License

/**
 * This will show add a character to the list of characters to be printed to the text file.
 * //from w  w w.  j a  v a 2 s.  c  o m
 * @param text
 *            The description of the character to display.
 */
@Override
protected void processTextPosition(final TextPosition text) {
    boolean showCharacter = true;
    if (suppressDuplicateOverlappingText) {
        showCharacter = false;
        final String textCharacter = text.getCharacter();
        final float textX = text.getX();
        final float textY = text.getY();
        List<TextPosition> sameTextCharacters = characterListMapping.get(textCharacter);
        if (sameTextCharacters == null) {
            sameTextCharacters = new ArrayList<TextPosition>();
            characterListMapping.put(textCharacter, sameTextCharacters);
        }

        // RDD - Here we compute the value that represents the end of the
        // rendered
        // text. This value is used to determine whether subsequent text
        // rendered
        // on the same line overwrites the current text.
        //
        // We subtract any positive padding to handle cases where extreme
        // amounts
        // of padding are applied, then backed off (not sure why this is
        // done, but there
        // are cases where the padding is on the order of 10x the character
        // width, and
        // the TJ just backs up to compensate after each character). Also,
        // we subtract
        // an amount to allow for kerning (a percentage of the width of the
        // last
        // character).
        //
        boolean suppressCharacter = false;
        final float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f;
        for (int i = 0; i < sameTextCharacters.size() && textCharacter != null; i++) {
            final TextPosition character = sameTextCharacters.get(i);
            final String charCharacter = character.getCharacter();
            final float charX = character.getX();
            final float charY = character.getY();
            // only want to suppress

            if (charCharacter != null &&
            // charCharacter.equals( textCharacter ) &&
                    within(charX, textX, tolerance) && within(charY, textY, tolerance)) {
                suppressCharacter = true;
            }
        }
        if (!suppressCharacter && (text.getCharacter() != null) && (text.getCharacter().length() > 0)) {
            sameTextCharacters.add(text);
            showCharacter = true;
        }
    }

    if (showCharacter) {
        // if we are showing the character then we need to determine which
        // article it belongs to.
        int foundArticleDivisionIndex = -1;
        int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
        int notFoundButFirstLeftArticleDivisionIndex = -1;
        int notFoundButFirstAboveArticleDivisionIndex = -1;
        final float x = text.getX();
        final float y = text.getY();
        if (shouldSeparateByBeads) {
            for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) {
                final PDThreadBead bead = pageArticles.get(i);
                if (bead != null) {
                    final PDRectangle rect = bead.getRectangle();
                    if (rect.contains(x, y)) {
                        foundArticleDivisionIndex = i * 2 + 1;
                    } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                            && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) {
                        notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                    } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) {
                        notFoundButFirstLeftArticleDivisionIndex = i * 2;
                    } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) {
                        notFoundButFirstAboveArticleDivisionIndex = i * 2;
                    }
                } else {
                    foundArticleDivisionIndex = 0;
                }
            }
        } else {
            foundArticleDivisionIndex = 0;
        }
        int articleDivisionIndex = -1;
        if (foundArticleDivisionIndex != -1) {
            articleDivisionIndex = foundArticleDivisionIndex;
        } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
        } else if (notFoundButFirstLeftArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
        } else if (notFoundButFirstAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
        } else {
            articleDivisionIndex = charactersByArticle.size() - 1;
        }
        final List<TextPosition> textList = charactersByArticle.get(articleDivisionIndex);
        textList.add(text);
    }
}

From source file:edu.isi.bmkeg.lapdf.extraction.LAPDFTextStripper.java

License:Apache License

/**
 * This will process a TextPosition object and add the
 * text to the list of characters on a page.  It takes care of
 * overlapping text.//from  ww w  .  ja  va2  s . com
 *
 * @param text The text to process.
 */
protected void processTextPosition(TextPosition text) {
    boolean showCharacter = true;
    if (suppressDuplicateOverlappingText) {
        showCharacter = false;
        String textCharacter = text.getCharacter();
        float textX = text.getX();
        float textY = text.getY();
        TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get(textCharacter);
        if (sameTextCharacters == null) {
            sameTextCharacters = new TreeMap<Float, TreeSet<Float>>();
            characterListMapping.put(textCharacter, sameTextCharacters);
        }
        // RDD - Here we compute the value that represents the end of the rendered
        // text.  This value is used to determine whether subsequent text rendered
        // on the same line overwrites the current text.
        //
        // We subtract any positive padding to handle cases where extreme amounts
        // of padding are applied, then backed off (not sure why this is done, but there
        // are cases where the padding is on the order of 10x the character width, and
        // the TJ just backs up to compensate after each character).  Also, we subtract
        // an amount to allow for kerning (a percentage of the width of the last
        // character).
        //
        boolean suppressCharacter = false;
        float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f;

        SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance,
                textX + tolerance);
        for (TreeSet<Float> xMatch : xMatches.values()) {
            SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance);
            if (!yMatches.isEmpty()) {
                suppressCharacter = true;
                break;
            }
        }
        if (!suppressCharacter) {
            TreeSet<Float> ySet = sameTextCharacters.get(textX);
            if (ySet == null) {
                ySet = new TreeSet<Float>();
                sameTextCharacters.put(textX, ySet);
            }
            ySet.add(textY);
            showCharacter = true;
        }
    }
    if (showCharacter) {
        //if we are showing the character then we need to determine which
        //article it belongs to.
        int foundArticleDivisionIndex = -1;
        int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
        int notFoundButFirstLeftArticleDivisionIndex = -1;
        int notFoundButFirstAboveArticleDivisionIndex = -1;
        float x = text.getX();
        float y = text.getY();
        if (shouldSeparateByBeads) {
            for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) {
                PDThreadBead bead = (PDThreadBead) pageArticles.get(i);
                if (bead != null) {
                    PDRectangle rect = bead.getRectangle();
                    if (rect.contains(x, y)) {
                        foundArticleDivisionIndex = i * 2 + 1;
                    } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                            && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) {
                        notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                    } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) {
                        notFoundButFirstLeftArticleDivisionIndex = i * 2;
                    } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) {
                        notFoundButFirstAboveArticleDivisionIndex = i * 2;
                    }
                } else {
                    foundArticleDivisionIndex = 0;
                }
            }
        } else {
            foundArticleDivisionIndex = 0;
        }
        int articleDivisionIndex = -1;
        if (foundArticleDivisionIndex != -1) {
            articleDivisionIndex = foundArticleDivisionIndex;
        } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
        } else if (notFoundButFirstLeftArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
        } else if (notFoundButFirstAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
        } else {
            articleDivisionIndex = charactersByArticle.size() - 1;
        }

        List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex);

        /* In the wild, some PDF encoded documents put diacritics (accents on
         * top of characters) into a separate Tj element.  When displaying them
         * graphically, the two chunks get overlayed.  With text output though,
         * we need to do the overlay. This code recombines the diacritic with
         * its associated character if the two are consecutive.
         */
        if (textList.isEmpty()) {
            textList.add(text);
        } else {
            /* test if we overlap the previous entry.  
             * Note that we are making an assumption that we need to only look back
             * one TextPosition to find what we are overlapping.  
             * This may not always be true. */
            TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1);
            if (text.isDiacritic() && previousTextPosition.contains(text)) {
                previousTextPosition.mergeDiacritic(text, normalize);
            }
            /* If the previous TextPosition was the diacritic, merge it into this
             * one and remove it from the list. */
            else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) {
                text.mergeDiacritic(previousTextPosition, normalize);
                textList.remove(textList.size() - 1);
                textList.add(text);
            } else {
                textList.add(text);
            }
        }
    }
}

From source file:helper.pdfpreprocessing.pdf.TextHighlight.java

License:Apache License

private boolean markupMatch(Color color, PDPageContentStream contentStream, Match markingMatch, int height,
        boolean withId, PDPage page, String comment, boolean commentOnly) throws IOException {
    final List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(markingMatch.positions);

    if (textBoundingBoxes.size() > 0) {
        contentStream.setNonStrokingColor(color);
        for (PDRectangle textBoundingBox : textBoundingBoxes) {
            if (comment.isEmpty()) {
                contentStream.addRect(textBoundingBox.getLowerLeftX(), textBoundingBox.getLowerLeftY(), Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10),
                        height);// w  w  w .jav a 2s . co m
                contentStream.fill();
            }
            if (withId) {
                PDFont font = PDType1Font.HELVETICA;
                contentStream.beginText();
                contentStream.setFont(font, 5);
                contentStream.newLineAtOffset(textBoundingBox.getUpperRightX(),
                        textBoundingBox.getUpperRightY());
                contentStream.showText(markingMatch.str);
                contentStream.endText();
            }
            if (!comment.isEmpty() && !commentOnly) {
                PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(
                        PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                PDRectangle position = new PDRectangle();
                position.setLowerLeftX(textBoundingBox.getLowerLeftX());
                position.setLowerLeftY(textBoundingBox.getLowerLeftY());
                position.setUpperRightX(textBoundingBox.getLowerLeftX() + Math
                        .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10));
                position.setUpperRightY(textBoundingBox.getLowerLeftY() + 10);
                txtMark.setRectangle(position);

                float[] quads = new float[8];
                quads[0] = position.getLowerLeftX(); // x1
                quads[1] = position.getUpperRightY() - 2; // y1
                quads[2] = position.getUpperRightX(); // x2
                quads[3] = quads[1]; // y2
                quads[4] = quads[0]; // x3
                quads[5] = position.getLowerLeftY() - 2; // y3
                quads[6] = quads[2]; // x4
                quads[7] = quads[5]; // y5
                txtMark.setQuadPoints(quads);
                txtMark.setConstantOpacity((float) 0.5);
                txtMark.setContents("Missing Assumption/s (" + markingMatch.str + "):\n" + comment);
                float[] colorArray = new float[] { 0, 0, 0 };
                colorArray = color.getColorComponents(colorArray);
                PDColor hColor = new PDColor(colorArray, PDDeviceRGB.INSTANCE);
                txtMark.setColor(hColor);
                txtMark.setCreationDate(Calendar.getInstance());
                txtMark.setTitlePopup("Assumption Error");
                page.getAnnotations().add(txtMark);
            } else if (!comment.isEmpty() && commentOnly) {
                for (int i = 0; i < page.getAnnotations().size(); i++) {
                    String extractedComment = page.getAnnotations().get(i).getContents();
                    if (extractedComment != null) {
                        String commentID = extractedComment.substring(extractedComment.indexOf("(") + 1,
                                extractedComment.indexOf(")"));
                        if (markingMatch.str.equals(commentID) && extractedComment.contains(comment)) {
                            page.getAnnotations().get(i).setContents(extractedComment + "\n" + comment);
                        }

                    }
                }
            }
        }
        return true;
    }
    return false;
}

From source file:hightlighting.PDFTextAnnotator.java

License:Apache License

private float[] computeQuads(PDRectangle rect) {
    float[] quads = new float[8];
    // top left/*from  w w w .  j  av a  2  s  .co m*/
    quads[0] = rect.getLowerLeftX(); // x1
    quads[1] = rect.getUpperRightY(); // y1
    // bottom left
    quads[2] = quads[0]; // x2
    quads[3] = rect.getLowerLeftY(); // y2
    // top right
    quads[4] = rect.getUpperRightX(); // x3
    quads[5] = quads[1]; // y3
    // bottom right
    quads[6] = quads[4]; // x4
    quads[7] = quads[3]; // y5

    return quads;
}

From source file:javaexample.RadialTextPdf.java

License:Open Source License

private void generatePage(PDDocument document) throws IOException {
    // Creates a new page.
    PDPage page = new PDPage(pageRect);
    document.addPage(page);//from  w w w. jav a  2  s  .  com

    // Gets boundings of the page.
    PDRectangle rect = page.getMediaBox();

    // Calculates the side of the square that fits into the page.
    float squareSide = Math.min(rect.getWidth(), rect.getHeight());

    // Calculates the center point of the page.
    float centerX = (rect.getLowerLeftX() + rect.getUpperRightX()) / 2;
    float centerY = (rect.getLowerLeftY() + rect.getUpperRightY()) / 2;

    PDPageContentStream cos = new PDPageContentStream(document, page);

    // Creates the font for the radial text.
    PDFont font = PDType1Font.HELVETICA_BOLD; // Standard font
    float fontSize = squareSide / 30;
    float fontAscent = font.getFontDescriptor().getAscent() / 1000 * fontSize;

    // Calculates key values for the drawings.
    float textX = squareSide / 3.4F; // x of the text.
    float textY = -fontAscent / 2; // y of the text (for vertical centering of text).
    float lineToX = textX * 0.97F; // x destination for the line.
    float lineWidth = squareSide / 900; // width of lines.

    // Moves the origin (0,0) of the axes to the center of the page.
    cos.concatenate2CTM(AffineTransform.getTranslateInstance(centerX, centerY));

    for (float degrees = 0; degrees < 360; degrees += 7.5) {
        double radians = degrees2Radians(degrees);

        // Creates a pure color with the hue based on the angle.
        Color textColor = Color.getHSBColor(degrees / 360.0F, 1, 1);

        // Saves the graphics state because the angle changes on each iteration.
        cos.saveGraphicsState();

        // Rotates the axes by the angle expressed in radians.
        cos.concatenate2CTM(AffineTransform.getRotateInstance(radians));

        // Draws a line from the center of the page.
        cos.setLineWidth(lineWidth);
        cos.moveTo(0, 0);
        cos.lineTo(lineToX, 0);
        cos.stroke();

        // Draws the radial text.
        cos.beginText();
        cos.setNonStrokingColor(textColor);
        cos.setFont(font, fontSize);
        cos.moveTextPositionByAmount(textX, textY);
        cos.drawString("PDF");
        cos.endText();

        // Restores the graphics state to remove rotation transformation.
        cos.restoreGraphicsState();
    }

    cos.close();
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;//from   w  ww  .ja va  2  s .  c  om

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:onyx.core.parser.PDFTextStripper.java

License:Apache License

/**
 * This will process a TextPosition object and add the
 * text to the list of characters on a page.  It takes care of
 * overlapping text./* w w w  .ja v a 2 s. c om*/
 *
 * @param text The text to process.
 */
protected void processTextPosition(TextPosition text) {
    boolean showCharacter = true;
    if (suppressDuplicateOverlappingText) {
        showCharacter = false;
        String textCharacter = text.getCharacter();
        float textX = text.getX();
        float textY = text.getY();
        TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get(textCharacter);
        if (sameTextCharacters == null) {
            sameTextCharacters = new TreeMap<Float, TreeSet<Float>>();
            characterListMapping.put(textCharacter, sameTextCharacters);
        }

        // RDD - Here we compute the value that represents the end of the rendered
        // text.  This value is used to determine whether subsequent text rendered
        // on the same line overwrites the current text.
        //
        // We subtract any positive padding to handle cases where extreme amounts
        // of padding are applied, then backed off (not sure why this is done, but there
        // are cases where the padding is on the order of 10x the character width, and
        // the TJ just backs up to compensate after each character).  Also, we subtract
        // an amount to allow for kerning (a percentage of the width of the last
        // character).
        //
        boolean suppressCharacter = false;
        float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f;

        SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance,
                textX + tolerance);
        for (TreeSet<Float> xMatch : xMatches.values()) {
            SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance);
            if (!yMatches.isEmpty()) {
                suppressCharacter = true;
                break;
            }
        }

        if (!suppressCharacter) {
            TreeSet<Float> ySet = sameTextCharacters.get(textX);
            if (ySet == null) {
                ySet = new TreeSet<Float>();
                sameTextCharacters.put(textX, ySet);
            }
            ySet.add(textY);
            showCharacter = true;
        }
    }

    if (showCharacter) {
        //if we are showing the character then we need to determine which
        //article it belongs to.
        int foundArticleDivisionIndex = -1;
        int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
        int notFoundButFirstLeftArticleDivisionIndex = -1;
        int notFoundButFirstAboveArticleDivisionIndex = -1;
        float x = text.getX();
        float y = text.getY();
        if (shouldSeparateByBeads) {
            for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) {
                PDThreadBead bead = (PDThreadBead) pageArticles.get(i);
                if (bead != null) {
                    PDRectangle rect = bead.getRectangle();
                    if (rect.contains(x, y)) {
                        foundArticleDivisionIndex = i * 2 + 1;
                    } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                            && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) {
                        notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                    } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) {
                        notFoundButFirstLeftArticleDivisionIndex = i * 2;
                    } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) {
                        notFoundButFirstAboveArticleDivisionIndex = i * 2;
                    }
                } else {
                    foundArticleDivisionIndex = 0;
                }
            }
        } else {
            foundArticleDivisionIndex = 0;
        }
        int articleDivisionIndex = -1;
        if (foundArticleDivisionIndex != -1) {
            articleDivisionIndex = foundArticleDivisionIndex;
        } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
        } else if (notFoundButFirstLeftArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
        } else if (notFoundButFirstAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
        } else {
            articleDivisionIndex = charactersByArticle.size() - 1;
        }

        List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex);

        /* In the wild, some PDF encoded documents put diacritics (accents on
         * top of characters) into a separate Tj element.  When displaying them
         * graphically, the two chunks get overlayed.  With text output though,
         * we need to do the overlay. This code recombines the diacritic with
         * its associated character if the two are consecutive.
         */
        if (textList.isEmpty()) {
            textList.add(text);
        } else {
            /* test if we overlap the previous entry.  
             * Note that we are making an assumption that we need to only look back
             * one TextPosition to find what we are overlapping.  
             * This may not always be true. */
            TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1);
            if (text.isDiacritic() && previousTextPosition.contains(text)) {
                previousTextPosition.mergeDiacritic(text, normalize);
            }
            /* If the previous TextPosition was the diacritic, merge it into this
             * one and remove it from the list. */
            else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) {
                text.mergeDiacritic(previousTextPosition, normalize);
                textList.remove(textList.size() - 1);
                textList.add(text);
            } else {
                textList.add(text);
            }
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

/**
 * Creates a stream (from FOP's PDF library) from a PDF page parsed with PDFBox.
 * @param sourceDoc the source PDF the given page to be copied belongs to
 * @param page the page to transform into a stream
 * @param key value to use as key for the stream
 * @param atdoc adjustment for stream//from   w w  w.jav a2 s  .  c  o m
 * @param fontinfo fonts
 * @param pos rectangle
 * @return the stream
 * @throws IOException if an I/O error occurs
 */
public String createStreamFromPDFBoxPage(PDDocument sourceDoc, PDPage page, String key, AffineTransform atdoc,
        FontInfo fontinfo, Rectangle pos) throws IOException {
    handleAnnotations(sourceDoc, page, atdoc);
    if (pageNumbers.containsKey(targetPage.getPageIndex())) {
        pageNumbers.get(targetPage.getPageIndex()).set(0, targetPage.makeReference());
    }
    PDResources sourcePageResources = page.getResources();
    PDStream pdStream = getContents(page);

    COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT);
    COSDictionary fontsBackup = null;
    UniqueName uniqueName = new UniqueName(key, sourcePageResources);
    String newStream = null;
    if (fonts != null && pdfDoc.isMergeFontsEnabled()) {
        fontsBackup = new COSDictionary(fonts);
        MergeFontsPDFWriter m = new MergeFontsPDFWriter(fonts, fontinfo, uniqueName, parentFonts, currentMCID);
        newStream = m.writeText(pdStream);
        //            if (newStream != null) {
        //                for (Object f : fonts.keySet().toArray()) {
        //                    COSDictionary fontdata = (COSDictionary)fonts.getDictionaryObject((COSName)f);
        //                    if (getUniqueFontName(fontdata) != null) {
        //                        fonts.removeItem((COSName)f);
        //                    }
        //                }
        //            }
    }
    if (newStream == null) {
        PDFWriter writer = new PDFWriter(uniqueName, currentMCID);
        newStream = writer.writeText(pdStream);
        currentMCID = writer.getCurrentMCID();

    }
    pdStream = new PDStream(sourceDoc, new ByteArrayInputStream(newStream.getBytes("ISO-8859-1")));
    mergeXObj(sourcePageResources.getCOSObject(), fontinfo, uniqueName);
    PDFDictionary pageResources = (PDFDictionary) cloneForNewDocument(sourcePageResources.getCOSObject());

    PDFDictionary fontDict = (PDFDictionary) pageResources.get("Font");
    if (fontDict != null && pdfDoc.isMergeFontsEnabled()) {
        for (Map.Entry<String, Typeface> fontEntry : fontinfo.getUsedFonts().entrySet()) {
            Typeface font = fontEntry.getValue();
            if (font instanceof FOPPDFFont) {
                FOPPDFFont pdfFont = (FOPPDFFont) font;
                if (pdfFont.getRef() == null) {
                    pdfFont.setRef(new PDFDictionary());
                    pdfDoc.assignObjectNumber(pdfFont.getRef());
                }
                fontDict.put(fontEntry.getKey(), pdfFont.getRef());
            }
        }
    }
    updateXObj(sourcePageResources.getCOSObject(), pageResources);
    if (fontsBackup != null) {
        sourcePageResources.getCOSObject().setItem(COSName.FONT, fontsBackup);
    }

    COSStream originalPageContents = pdStream.getCOSObject();

    bindOptionalContent(sourceDoc);

    PDFStream pageStream;
    Set filter;
    //        if (originalPageContents instanceof COSStreamArray) {
    //            COSStreamArray array = (COSStreamArray)originalPageContents;
    //            pageStream = new PDFStream();
    //            InputStream in = array.getUnfilteredStream();
    //            OutputStream out = pageStream.getBufferOutputStream();
    //            IOUtils.copyLarge(in, out);
    //            filter = FILTER_FILTER;
    //        } else {
    pageStream = (PDFStream) cloneForNewDocument(originalPageContents);
    filter = Collections.EMPTY_SET;
    //        }
    if (pageStream == null) {
        pageStream = new PDFStream();
    }
    if (originalPageContents != null) {
        transferDict(originalPageContents, pageStream, filter);
    }

    transferPageDict(fonts, uniqueName, sourcePageResources);

    PDRectangle mediaBox = page.getMediaBox();
    PDRectangle cropBox = page.getCropBox();
    PDRectangle viewBox = cropBox != null ? cropBox : mediaBox;

    //Handle the /Rotation entry on the page dict
    int rotation = PDFUtil.getNormalizedRotation(page);

    //Transform to FOP's user space
    float w = (float) pos.getWidth() / 1000f;
    float h = (float) pos.getHeight() / 1000f;
    if (rotation == 90 || rotation == 270) {
        float tmp = w;
        w = h;
        h = tmp;
    }
    atdoc.setTransform(AffineTransform.getScaleInstance(w / viewBox.getWidth(), h / viewBox.getHeight()));
    atdoc.translate(0, viewBox.getHeight());
    atdoc.rotate(-Math.PI);
    atdoc.scale(-1, 1);
    atdoc.translate(-viewBox.getLowerLeftX(), -viewBox.getLowerLeftY());

    rotate(rotation, viewBox, atdoc);

    StringBuilder boxStr = new StringBuilder();
    boxStr.append(PDFNumber.doubleOut(mediaBox.getLowerLeftX())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getLowerLeftY())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getWidth())).append(' ')
            .append(PDFNumber.doubleOut(mediaBox.getHeight())).append(" re W n\n");
    return boxStr.toString() + IOUtils.toString(pdStream.createInputStream(null), "ISO-8859-1");
}