Example usage for org.apache.pdfbox.pdmodel.common PDRectangle getUpperRightY

List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getUpperRightY

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.common PDRectangle getUpperRightY.

Prototype

public float getUpperRightY() 

Source Link

Document

This will get the upper right y coordinate.

Usage

From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java

License:Apache License

private void correctPosition(@NotNull final PDFont fontObj, final byte[] string, final int i,
        @NotNull final String c, final float fontSizeText, final float glyphSpaceToTextSpaceFactor,
        float horizontalScalingText, final int codeLength, @NotNull final ETextPosition text)
        throws IOException {

    /**/*  w  ww . ja v a2s .  c  o m*/
     * Provide precise positioning of glyphs.
     *
     * There are several problems right which needs to be worked around:
     *
     * 1. Sometimes the PDF will make room for a glyph which belongs to a font with
     *      one or more very tall glyphs by jumping up on the page before drawing.
     *   Since most glyphs are (much) shorter than the tallest one, we need to make
     *      up for that by adjusting the Y coordinate back down. The distance which
     *      is jumped up is embedded in the PDF files, so there is no other way to go
     *      about this.
     *
     *  'beforeRoomForGlyph' is the position we were at before the jump back.
     *   Then we need to add spaceOverChar which is my estimate of where the glyph
     *      should begin. the result is kept in 'startY'
     *
     * 2. The default height we get might also be too big, so recalculate that based
     *      on character bounding
     *
     */
    final BoundingBox character = fontObj.getCharacterBoundingBox(string, i, codeLength);
    PDRectangle fontBB = null;

    try {
        fontBB = fontObj.getFontBoundingBox();
    } catch (RuntimeException e) {

        // ignore, this is frequently not implemented
    }

    final Rectangle pos = text.getPos();
    float adjust = (fontSizeText * horizontalScalingText) / glyphSpaceToTextSpaceFactor;

    adjust *= getTextMatrix().getXScale();

    final Rectangle newPos;

    if ((character != null) && (fontBB != null) && (character.getHeight() > 0.0f)
            && (fontBB.getHeight() > 0.0f)) {

        /* remove the upper and lower bounds filtered away by character */
        final float spaceUnderChar = Math.min(fontBB.getLowerLeftY(), character.getLowerLeftY());
        final float spaceOverChar = fontBB.getUpperRightY() - character.getUpperRightY();
        final float fontHeight = fontBB.getHeight();

        /* calculate the upper left corner of the rendered glyph */
        float yStart = pos.endY - adjust * fontHeight;
        yStart += adjust * spaceOverChar;
        yStart -= adjust * spaceUnderChar;
        yStart -= pos.height;

        /* determine start X coordinate. */
        final float x;

        if (isMonoSpacedFont(fontObj)) {
            x = pos.x;
        } else {
            //                float leftOfText = text.getX() - (adjust * fontBB.getWidth());
            //
            //                x = leftOfText + adjust * character.getLowerLeftX();
            x = pos.x;
        }

        /*
         *  It was much easier to write the word segmentation code with full font width,
         *   so lets keep that. I havent seen this causing any problems
         */
        float w = pos.width;

        /*
         *  Line segmentation code was obviously much easier by not having any descenders which
         *   can even overlap into the following line. Math symbols need to stay full length
         */
        final float characterHeight;

        if (NO_DESCENDERS && (Character.getType(c.charAt(0)) != (int) Character.MATH_SYMBOL)) {
            characterHeight = character.getUpperRightY();
        } else {
            characterHeight = character.getHeight();
        }

        float h = adjust * (characterHeight);

        /* correct if the NO_DESCENDERS hack made this character have no height*/
        if (NO_DESCENDERS && h < 0.1f) {
            h = pos.height;
        }

        newPos = new Rectangle(x, yStart, w, h);
    } else {

        /*
         *  here we have a lot less information, so keep most of what was calculated. Just offset
         *   the Y coordinate
         */
        float h = pos.height;
        float w = pos.width;
        float startY = pos.y - h;// * 0.8f;

        if (fontObj instanceof PDType3Font) {

            /*
             *  type 3 fonts typically have almost no information
             * try to mitigate the damage by keeping them small.
             */
            h *= 0.5f;
            startY += h; /* this is a _very_ quick and dirty hack */
        }

        newPos = new Rectangle(pos.x, startY, w, h);
    }

    if (log.isTraceEnabled()) {
        log.trace("LOG00730:Text " + c + ", " + "pos from " + pos + " to " + newPos);
    }

    text.setBaseLine(pos.y);
    text.setPos(newPos);
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

protected void loadAndPreflightPdf() throws NuxeoException {

    if (pdfDoc == null) {
        pdfDoc = PDFUtils.load(pdfBlob, password);

        @SuppressWarnings("unchecked")
        List<PDPage> allPages = pdfDoc.getDocumentCatalog().getAllPages();
        try {//from   ww  w .  j a  va  2 s.com
            stripper = new PDFTextStripperByArea();
            for (PDPage page : allPages) {
                List<PDAnnotation> annotations = page.getAnnotations();
                for (int j = 0; j < annotations.size(); j++) {
                    PDAnnotation annot = (PDAnnotation) annotations.get(j);
                    if (annot instanceof PDAnnotationLink) {
                        PDAnnotationLink link = (PDAnnotationLink) annot;
                        PDRectangle rect = link.getRectangle();
                        // need to reposition link rectangle to match text space
                        float x = rect.getLowerLeftX();
                        float y = rect.getUpperRightY();
                        float width = rect.getWidth();
                        float height = rect.getHeight();
                        int rotation = page.findRotation();
                        if (rotation == 0) {
                            PDRectangle pageSize = page.findMediaBox();
                            y = pageSize.getHeight() - y;
                        } else if (rotation == 90) {
                            // do nothing
                        }

                        Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                        stripper.addRegion("" + j, awtRect);
                    }
                }
            }
        } catch (IOException e) {
            throw new NuxeoException("Cannot prefilght and prepare regions", e);
        }
    }
}

From source file:org.olat.core.util.pdf.PdfDocument.java

License:Apache License

public PDPage addPage() throws IOException {
    if (currentContentStream != null) {
        currentContentStream.close();//from  w  w  w  .  j a v  a  2 s  . c o m
    }

    PDPage page = new PDPage(PDPage.PAGE_SIZE_A4);
    document.addPage(page);
    currentPage = page;
    currentContentStream = new PDPageContentStream(document, currentPage);

    PDRectangle mediabox = currentPage.findMediaBox();
    width = mediabox.getWidth() - 2 * marginLeftRight;
    currentY = mediabox.getUpperRightY() - marginTopBottom;
    return page;
}

From source file:org.opencps.util.ExtractTextLocations.java

License:Open Source License

public ExtractTextLocations(String fullPath) throws IOException {

    PDDocument document = null;//  ww w.ja  v a 2 s  .  c o m

    try {
        File input = new File(fullPath);
        document = PDDocument.load(input);

        if (document.isEncrypted()) {
            try {
                document.decrypt(StringPool.BLANK);
            } catch (Exception e) {
                _log.error(e);
            }
        }

        // ExtractTextLocations printer = new ExtractTextLocations();

        List allPages = document.getDocumentCatalog().getAllPages();
        if (allPages != null && allPages.size() > 0) {
            PDPage page = (PDPage) allPages.get(0);

            PDStream contents = page.getContents();
            if (contents != null) {
                this.processStream(page, page.findResources(), page.getContents().getStream());
            }

            PDRectangle pageSize = page.findMediaBox();
            if (pageSize != null) {
                setPageWidth(pageSize.getWidth());
                setPageHeight(pageSize.getHeight());
                setPageLLX(pageSize.getLowerLeftX());
                setPageURX(pageSize.getUpperRightX());
                setPageLLY(pageSize.getLowerLeftY());
                setPageURY(pageSize.getUpperRightY());
            }
        }
    } catch (Exception e) {
        _log.error(e);
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract embedded URIs from the PDF-document.
 * /*from  w  w  w  . j av a 2  s  .co  m*/
 */
protected void extractURLs(IParserDocument parserDoc, PDDocument pddDoc) throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    @SuppressWarnings("unchecked")
    final List<PDPage> allPages = pddDocCatalog.getAllPages();
    if (allPages == null || allPages.isEmpty())
        return;

    for (int i = 0; i < allPages.size(); i++) {
        final PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        final PDPage page = (PDPage) allPages.get(i);

        @SuppressWarnings("unchecked")
        final List<PDAnnotation> annotations = page.getAnnotations();
        if (annotations == null || annotations.isEmpty())
            return;

        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDRectangle rect = link.getRectangle();

                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.findRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.findMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDAction action = link.getAction();
                final String urlText = stripper.getTextForRegion("" + j);

                if (action instanceof PDActionURI) {
                    final PDActionURI embeddedUri = (PDActionURI) action;
                    final URI temp = URI.create(embeddedUri.getURI());

                    parserDoc.addReference(temp, urlText, Constants.SERVICE_PID + ":" + PID);
                }
            }
        }
    }
}

From source file:org.polarsys.kitalpha.doc.doc2model.tikaparsing.pdf.Doc2ModelTextStripper.java

License:Apache License

/**
 * This will process a TextPosition object and add the
 * text to the list of characters on a page.  It takes care of
 * overlapping text.//ww  w .j  a  v  a 2  s .c o  m
 *
 * @param text The text to process.
 */
protected void processTextPosition(TextPosition rawtext) {

    StylizedTextPosition text = (StylizedTextPosition) processStyle(rawtext);

    boolean showCharacter = true;
    if (suppressDuplicateOverlappingText) {
        showCharacter = false;
        String textCharacter = text.getCharacter();
        float textX = text.getX();
        float textY = text.getY();
        List<TextPosition> sameTextCharacters = (List<TextPosition>) characterListMapping.get(textCharacter);
        if (sameTextCharacters == null) {
            sameTextCharacters = new ArrayList<TextPosition>();
            characterListMapping.put(textCharacter, sameTextCharacters);
        }

        // RDD - Here we compute the value that represents the end of the rendered
        // text.  This value is used to determine whether subsequent text rendered
        // on the same line overwrites the current text.
        //
        // We subtract any positive padding to handle cases where extreme amounts
        // of padding are applied, then backed off (not sure why this is done, but there
        // are cases where the padding is on the order of 10x the character width, and
        // the TJ just backs up to compensate after each character).  Also, we subtract
        // an amount to allow for kerning (a percentage of the width of the last
        // character).
        //
        boolean suppressCharacter = false;
        float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f;
        for (int i = 0; i < sameTextCharacters.size() && textCharacter != null; i++) {
            TextPosition character = sameTextCharacters.get(i);
            String charCharacter = character.getCharacter();
            float charX = character.getX();
            float charY = character.getY();
            //only want to suppress

            if (charCharacter != null &&
            //charCharacter.equals( textCharacter ) &&
                    within(charX, textX, tolerance) && within(charY, textY, tolerance)) {
                suppressCharacter = true;
            }
        }
        if (!suppressCharacter) {
            sameTextCharacters.add(text);
            showCharacter = true;
        }
    }

    if (showCharacter) {
        //if we are showing the character then we need to determine which
        //article it belongs to.
        int foundArticleDivisionIndex = -1;
        int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
        int notFoundButFirstLeftArticleDivisionIndex = -1;
        int notFoundButFirstAboveArticleDivisionIndex = -1;
        float x = text.getX();
        float y = text.getY();
        if (shouldSeparateByBeads) {
            for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) {
                PDThreadBead bead = (PDThreadBead) pageArticles.get(i);
                if (bead != null) {
                    PDRectangle rect = bead.getRectangle();
                    if (rect.contains(x, y)) {
                        foundArticleDivisionIndex = i * 2 + 1;
                    } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                            && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) {
                        notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                    } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) {
                        notFoundButFirstLeftArticleDivisionIndex = i * 2;
                    } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) {
                        notFoundButFirstAboveArticleDivisionIndex = i * 2;
                    }
                } else {
                    foundArticleDivisionIndex = 0;
                }
            }
        } else {
            foundArticleDivisionIndex = 0;
        }
        int articleDivisionIndex = -1;
        if (foundArticleDivisionIndex != -1) {
            articleDivisionIndex = foundArticleDivisionIndex;
        } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
        } else if (notFoundButFirstLeftArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
        } else if (notFoundButFirstAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
        } else {
            articleDivisionIndex = charactersByArticle.size() - 1;
        }

        List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex);

        /* In the wild, some PDF encoded documents put diacritics (accents on
         * top of characters) into a separate Tj element.  When displaying them
         * graphically, the two chunks get overlayed.  With text output though,
         * we need to do the overlay. This code recombines the diacritic with
         * its associated character if the two are consecutive.
         */
        if (textList.isEmpty()) {
            textList.add(text);
        } else {
            /* test if we overlap the previous entry.  
             * Note that we are making an assumption that we need to only look back
             * one TextPosition to find what we are overlapping.  
             * This may not always be true. */
            TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1);
            if (text.isDiacritic() && previousTextPosition.contains(text)) {
                previousTextPosition.mergeDiacritic(text, normalize);
            }
            /* If the previous TextPosition was the diacritic, merge it into this
             * one and remove it from the list. */
            else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) {
                text.mergeDiacritic(previousTextPosition, normalize);
                textList.remove(textList.size() - 1);
                textList.add(text);
            } else {
                textList.add(text);
            }
        }
    }
}

From source file:org.xwiki.test.misc.PDFTest.java

License:Open Source License

/**
 * Code adapted from http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/PrintURLs.java.html
 *//*from   w ww .jav a  2 s .com*/
private Map<String, PDAction> extractLinks(PDPage page) throws Exception {
    Map<String, PDAction> links = new HashMap<String, PDAction>();
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    List<PDAnnotation> annotations = page.getAnnotations();
    // First setup the text extraction regions.
    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            PDRectangle rect = link.getRectangle();
            // Need to reposition link rectangle to match text space.
            float x = rect.getLowerLeftX();
            float y = rect.getUpperRightY();
            float width = rect.getWidth();
            float height = rect.getHeight();
            int rotation = page.getRotation();
            if (rotation == 0) {
                PDRectangle pageSize = page.getMediaBox();
                y = pageSize.getHeight() - y;
            } else if (rotation == 90) {
                // Do nothing.
            }

            Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
            stripper.addRegion(String.valueOf(j), awtRect);
        }
    }

    stripper.extractRegions(page);

    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            String label = stripper.getTextForRegion(String.valueOf(j)).trim();
            links.put(label, link.getAction());
        }
    }

    return links;
}

From source file:paper2ebook.Transformer.java

License:Apache License

/**
 * Heuristic search of the list of interesting areas in page, returned by
 * natural read order.//from  w  ww  .  j a  v  a 2 s  . com
 */
public List<PDRectangle> getFragments(PDPage page) {
    List<PDRectangle> fragments = new ArrayList<PDRectangle>();

    // TODO: naive 2 columns hack: rewrite me to introspect the document
    // structure instead

    PDRectangle origBox = page.findCropBox();
    float width = origBox.getWidth();
    float height = origBox.getHeight();

    // top left
    PDRectangle box = new PDRectangle();
    box.setLowerLeftX(origBox.getLowerLeftX());
    box.setLowerLeftY(origBox.getLowerLeftY() + height / 2);
    box.setUpperRightX(origBox.getUpperRightX() / 2);
    box.setUpperRightY(origBox.getUpperRightY());
    fragments.add(box);

    // bottom left
    box = new PDRectangle();
    box.setLowerLeftX(origBox.getLowerLeftX());
    box.setLowerLeftY(origBox.getLowerLeftY());
    box.setUpperRightX(origBox.getUpperRightX() / 2);
    box.setUpperRightY(origBox.getUpperRightY() / 2);
    fragments.add(box);

    // top right
    box = new PDRectangle();
    box.setLowerLeftX(origBox.getLowerLeftX() + width / 2);
    box.setLowerLeftY(origBox.getLowerLeftY() + height / 2);
    box.setUpperRightX(origBox.getUpperRightX());
    box.setUpperRightY(origBox.getUpperRightY());
    fragments.add(box);

    // bottom right
    box = new PDRectangle();
    box.setLowerLeftX(origBox.getLowerLeftX() + width / 2);
    box.setLowerLeftY(origBox.getLowerLeftY());
    box.setUpperRightX(origBox.getUpperRightX());
    box.setUpperRightY(origBox.getUpperRightY() / 2);
    fragments.add(box);

    return fragments;
}

From source file:PDF.RotatePDF.java

private void transformPage(PDDocument document, PDPage page, AffineTransform at)
        throws IOException, COSVisitorException {
    PDRectangle cropBox = page.findCropBox();
    float xOffset = (cropBox.getUpperRightX() + cropBox.getLowerLeftX()) / 2f;
    float yOffset = (cropBox.getUpperRightY() + cropBox.getLowerLeftY()) / 2f;
    AffineTransform transform = AffineTransform.getTranslateInstance(xOffset, yOffset);
    transform.concatenate(at);//from w w w . ja va  2  s .  c o  m
    transform.concatenate(AffineTransform.getTranslateInstance(-xOffset, -yOffset));

    PDPageContentStream stream = new PDPageContentStream(document, page, true, false);
    stream.concatenate2CTM(transform);
    stream.close();

    COSBase contents = page.getCOSDictionary().getDictionaryObject(COSName.CONTENTS);
    if (contents instanceof COSStreamArray) {
        COSStreamArray contentsArray = (COSStreamArray) contents;
        COSArray newArray = new COSArray();
        newArray.add(contentsArray.get(contentsArray.getStreamCount() - 1));

        for (int i = 0; i < contentsArray.getStreamCount() - 1; i++) {
            newArray.add(contentsArray.get(i));
        }

        COSStreamArray newStreamArray = new COSStreamArray(newArray);
        page.getCOSDictionary().setItem(COSName.CONTENTS, newStreamArray);
    }
}

From source file:so.rezervacija.StampajRezervaciju.java

@Override
protected void izvrsiKonkretnuOperaciju() throws Exception {
    PDDocument doc = null;//from   w w w. j a va 2  s .com
    PDPage page = null;

    try {
        doc = new PDDocument();
        page = new PDPage();

        doc.addPage(page);
        PDFont pdfFont = PDType1Font.HELVETICA_BOLD;
        float fontSize = 25;
        float leading = 1.5f * fontSize;

        PDPageContentStream contentStream = new PDPageContentStream(doc, page);

        PDRectangle mediabox = page.findMediaBox();
        float margin = 72;
        float width = mediabox.getWidth() - 2 * margin;
        float startX = mediabox.getLowerLeftX() + margin;
        float startY = mediabox.getUpperRightY() - margin;

        String text = "Izvrsili ste rezervaciju za tretman " + r.getTretman() + ", vreme rezervacije:"
                + new SimpleDateFormat("YYYY-MM-dd HH:mm").format(r.getVreme())
                + " zaposleni koji ce vrsiti tretman:" + r.getZaposleni().getImePrezime();
        List<String> lines = new ArrayList<String>();
        int lastSpace = -1;
        while (text.length() > 0) {
            int spaceIndex = text.indexOf(' ', lastSpace + 1);
            if (spaceIndex < 0) {
                lines.add(text);
                text = "";
            } else {
                String subString = text.substring(0, spaceIndex);
                float size = fontSize * pdfFont.getStringWidth(subString) / 1000;
                if (size > width) {
                    if (lastSpace < 0) // So we have a word longer than the line... draw it anyways
                    {
                        lastSpace = spaceIndex;
                    }
                    subString = text.substring(0, lastSpace);
                    lines.add(subString);
                    text = text.substring(lastSpace).trim();
                    lastSpace = -1;
                } else {
                    lastSpace = spaceIndex;
                }
            }
        }

        contentStream.beginText();
        contentStream.setFont(pdfFont, fontSize);
        contentStream.moveTextPositionByAmount(startX, startY);
        for (String line : lines) {
            contentStream.drawString(line);
            contentStream.moveTextPositionByAmount(0, -leading);
        }
        contentStream.endText();
        contentStream.close();

        doc.save("PotvrdaRezervacije.pdf");

        if (Desktop.isDesktopSupported()) {
            try {
                File myFile = new File("../ServerProjekat/PotvrdaRezervacije.pdf");
                Desktop.getDesktop().open(myFile);
            } catch (IOException ex) {
                // no application registered for PDFs
            }
        }

        doc.close();
    } catch (Exception e) {
        System.out.println(e);
    }
}