Example usage for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX

List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX.

Prototype

public float getLowerLeftX() 

Source Link

Document

This will get the lower left x coordinate.

Usage

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private void rotate(int rotation, PDRectangle viewBox, AffineTransform atdoc) {
    float x = viewBox.getWidth() + viewBox.getLowerLeftX();
    float y = viewBox.getHeight() + viewBox.getLowerLeftY();
    switch (rotation) {
    case 90:/* www  .  ja  v a 2 s  .  com*/
        atdoc.scale(viewBox.getWidth() / viewBox.getHeight(), viewBox.getHeight() / viewBox.getWidth());
        atdoc.translate(0, viewBox.getWidth());
        atdoc.rotate(-Math.PI / 2.0);
        atdoc.scale(viewBox.getWidth() / viewBox.getHeight(), viewBox.getHeight() / viewBox.getWidth());
        break;
    case 180:
        atdoc.translate(x, y);
        atdoc.rotate(-Math.PI);
        atdoc.translate(-viewBox.getLowerLeftX(), -viewBox.getLowerLeftY());
        break;
    case 270:
        atdoc.translate(viewBox.getLowerLeftX(), y);
        atdoc.rotate(Math.toRadians(270 + 180));
        atdoc.translate(-x, -y);
        break;
    default:
        //no additional transformations necessary
        break;
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private void moveAnnotations(PDPage page, List pageAnnotations, AffineTransform at) {
    PDRectangle mediaBox = page.getMediaBox();
    PDRectangle cropBox = page.getCropBox();
    PDRectangle viewBox = cropBox != null ? cropBox : mediaBox;
    for (Object obj : pageAnnotations) {
        PDAnnotation annot = (PDAnnotation) obj;
        PDRectangle rect = annot.getRectangle();
        float translateX = (float) (at.getTranslateX() - viewBox.getLowerLeftX());
        float translateY = (float) (at.getTranslateY() - viewBox.getLowerLeftY());
        if (rect != null) {
            rect.setUpperRightX(rect.getUpperRightX() + translateX);
            rect.setLowerLeftX(rect.getLowerLeftX() + translateX);
            rect.setUpperRightY(rect.getUpperRightY() + translateY);
            rect.setLowerLeftY(rect.getLowerLeftY() + translateY);
            annot.setRectangle(rect);//from w w  w.  j a  v a2 s. co m
        }
        //            COSArray vertices = (COSArray) annot.getCOSObject().getDictionaryObject("Vertices");
        //            if (vertices != null) {
        //                Iterator iter = vertices.iterator();
        //                while (iter.hasNext()) {
        //                    COSFloat x = (COSFloat) iter.next();
        //                    COSFloat y = (COSFloat) iter.next();
        //                    x.setValue(x.floatValue() + translateX);
        //                    y.setValue(y.floatValue() + translateY);
        //                }
        //            }
    }
}

From source file:org.apache.pdflens.views.pagesview.PageDrawer.java

License:Apache License

/**
 * This will draw the page to the requested context.
 *
 * @param g The graphics context to draw onto.
 * @param p The page to draw./*from   w w  w  . j  ava  2s. c  om*/
 * @param pageDimension The size of the page to draw.
 *
 * @throws IOException If there is an IO error while drawing the page.
 */
public void drawPage(Graphics g, PDPage p, Dimension pageDimension) throws IOException {
    graphics = (Graphics2D) g;
    page = p;
    pageSize = pageDimension;
    graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
    graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
    // Only if there is some content, we have to process it. 
    // Otherwise we are done here and we will produce an empty page
    if (page.getContents() != null) {
        PDResources resources = page.findResources();
        processStream(page, resources, page.getContents().getStream());
    }
    List annotations = page.getAnnotations();
    for (int i = 0; i < annotations.size(); i++) {
        PDAnnotation annot = (PDAnnotation) annotations.get(i);
        PDRectangle rect = annot.getRectangle();
        String appearanceName = annot.getAppearanceStream();
        PDAppearanceDictionary appearDictionary = annot.getAppearance();
        if (appearDictionary != null) {
            if (appearanceName == null) {
                appearanceName = "default";
            }
            Map appearanceMap = appearDictionary.getNormalAppearance();
            PDAppearanceStream appearance = (PDAppearanceStream) appearanceMap.get(appearanceName);
            if (appearance != null) {
                g.translate((int) rect.getLowerLeftX(), (int) -rect.getLowerLeftY());
                processSubStream(page, appearance.getResources(), appearance.getStream());
                g.translate((int) -rect.getLowerLeftX(), (int) +rect.getLowerLeftY());
            }
        }
    }

}

From source file:org.data2semantics.annotate.D2S_SampleAnnotation.java

License:Apache License

/**
 * This will create a doucument showing various annotations.
 * //w ww.  j  a v  a 2s. co m
 * @param args
 *            The command line arguments.
 * 
 * @throws Exception
 *             If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {

    PDDocument document = new PDDocument();

    try {
        PDPage page = new PDPage();
        document.addPage(page);
        List annotations = page.getAnnotations();

        // Setup some basic reusable objects/constants
        // Annotations themselves can only be used once!

        float inch = 72;
        PDGamma colourRed = new PDGamma();
        colourRed.setR(1);
        PDGamma colourBlue = new PDGamma();
        colourBlue.setB(1);
        PDGamma colourBlack = new PDGamma();

        PDBorderStyleDictionary borderThick = new PDBorderStyleDictionary();
        borderThick.setWidth(inch / 12); // 12th inch
        PDBorderStyleDictionary borderThin = new PDBorderStyleDictionary();
        borderThin.setWidth(inch / 72); // 1 point
        PDBorderStyleDictionary borderULine = new PDBorderStyleDictionary();
        borderULine.setStyle(PDBorderStyleDictionary.STYLE_UNDERLINE);
        borderULine.setWidth(inch / 72); // 1 point

        float pw = page.getMediaBox().getUpperRightX();
        float ph = page.getMediaBox().getUpperRightY();

        // First add some text, two lines we'll add some annotations to this
        // later

        PDFont font = PDType1Font.HELVETICA_BOLD;

        PDPageContentStream contentStream = new PDPageContentStream(document, page);
        contentStream.beginText();
        contentStream.setFont(font, 18);
        contentStream.moveTextPositionByAmount(inch, ph - inch - 18);
        contentStream.drawString("PDFBox");
        contentStream.moveTextPositionByAmount(0, -(inch / 2));
        contentStream.drawString("Click Here");
        contentStream.endText();

        contentStream.close();

        // Now add the markup annotation, a highlight to PDFBox text
        PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
        txtMark.setColour(colourBlue);
        txtMark.setConstantOpacity((float) 0.2); // Make the highlight 20%
        // transparent

        // Set the rectangle containing the markup

        float textWidth = (font.getStringWidth("PDFBox") / 1000) * 18;
        PDRectangle position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - inch - 18);
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - inch);
        txtMark.setRectangle(position);

        // work out the points forming the four corners of the annotations
        // set out in anti clockwise form (Completely wraps the text)
        // OK, the below doesn't match that description.
        // It's what acrobat 7 does and displays properly!
        float[] quads = new float[8];

        quads[0] = position.getLowerLeftX(); // x1
        quads[1] = position.getUpperRightY() - 2; // y1
        quads[2] = position.getUpperRightX(); // x2
        quads[3] = quads[1]; // y2
        quads[4] = quads[0]; // x3
        quads[5] = position.getLowerLeftY() - 2; // y3
        quads[6] = quads[2]; // x4
        quads[7] = quads[5]; // y5

        txtMark.setQuadPoints(quads);
        txtMark.setContents("Highlighted since it's important");

        annotations.add(txtMark);

        // Now add the link annotation, so the clickme works
        PDAnnotationLink txtLink = new PDAnnotationLink();
        txtLink.setBorderStyle(borderULine);

        // Set the rectangle containing the link

        textWidth = (font.getStringWidth("Click Here") / 1000) * 18;
        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (float) (1.5 * inch) - 20); // down a
        // couple of
        // points
        position.setUpperRightX(72 + textWidth);
        position.setUpperRightY(ph - (float) (1.5 * inch));
        txtLink.setRectangle(position);

        // add an action
        PDActionURI action = new PDActionURI();
        action.setURI("http://www.pdfbox.org");
        txtLink.setAction(action);

        annotations.add(txtLink);

        // Now draw a few more annotations

        PDAnnotationSquareCircle aCircle = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_CIRCLE);
        aCircle.setContents("Circle Annotation");
        aCircle.setInteriorColour(colourRed); // Fill in circle in red
        aCircle.setColour(colourBlue); // The border itself will be blue
        aCircle.setBorderStyle(borderThin);

        // Place the annotation on the page, we'll make this 1" round
        // 3" down, 1" in on the page

        position = new PDRectangle();
        position.setLowerLeftX(inch);
        position.setLowerLeftY(ph - (3 * inch) - inch); // 1" height, 3"
        // down
        position.setUpperRightX(2 * inch); // 1" in, 1" width
        position.setUpperRightY(ph - (3 * inch)); // 3" down
        aCircle.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aCircle);

        // Now a square annotation

        PDAnnotationSquareCircle aSquare = new PDAnnotationSquareCircle(
                PDAnnotationSquareCircle.SUB_TYPE_SQUARE);
        aSquare.setContents("Square Annotation");
        aSquare.setColour(colourRed); // Outline in red, not setting a fill
        aSquare.setBorderStyle(borderThick);

        // Place the annotation on the page, we'll make this 1" (72points)
        // square
        // 3.5" down, 1" in from the right on the page

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(pw - (2 * inch)); // 1" in from right, 1"
        // wide
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch); // 1" in from right
        position.setUpperRightY(ph - (float) (3.5 * inch)); // 3.5" down
        aSquare.setRectangle(position);

        // add to the annotations on the page
        annotations.add(aSquare);

        // Now we want to draw a line between the two, one end with an open
        // arrow

        PDAnnotationLine aLine = new PDAnnotationLine();

        aLine.setEndPointEndingStyle(PDAnnotationLine.LE_OPEN_ARROW);
        aLine.setContents("Circle->Square");
        aLine.setCaption(true); // Make the contents a caption on the line

        // Set the rectangle containing the line

        position = new PDRectangle(); // Reuse the variable, but note it's a
        // new object!
        position.setLowerLeftX(2 * inch); // 1" in + width of circle
        position.setLowerLeftY(ph - (float) (3.5 * inch) - inch); // 1" height, 3.5"
        // down
        position.setUpperRightX(pw - inch - inch); // 1" in from right, and
        // width of square
        position.setUpperRightY(ph - (3 * inch)); // 3" down (top of circle)
        aLine.setRectangle(position);

        // Now set the line position itself
        float[] linepos = new float[4];
        linepos[0] = 2 * inch; // x1 = rhs of circle
        linepos[1] = ph - (float) (3.5 * inch); // y1 halfway down circle
        linepos[2] = pw - (2 * inch); // x2 = lhs of square
        linepos[3] = ph - (4 * inch); // y2 halfway down square
        aLine.setLine(linepos);

        aLine.setBorderStyle(borderThick);
        aLine.setColour(colourBlack);

        // add to the annotations on the page
        annotations.add(aLine);

        // Finally all done

        document.save("testAnnotation.pdf");
    } finally {
        document.close();
    }
}

From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java

License:Apache License

/**
 * This will process the contents of a page.
 *
 * @param page    The page to process.//  ww  w  .  ja v a  2s.com
 * @param content The contents of the page.
 * @throws IOException If there is an error processing the page.
 */
protected void processPage(@NotNull PDPage page, COSStream content) throws IOException {

    if ((currentPageNo >= startPage) && (currentPageNo <= endPage)) {

        /* show which page we are working on in the log */
        MDC.put("page", currentPageNo);
        charactersForPage.clear();
        characterListMapping.clear();
        pageSize = page.findCropBox().createDimension();
        rotation = (float) page.findRotation();

        /* this is used to 'draw' images on during pdf parsing */
        graphicsDrawer.clearSurface();
        setGraphicsState(null);
        resetEngine();
        processStream(page, page.findResources(), content);
        filterOutBadFonts(charactersForPage);

        /* filter out remaining definite bad characters */
        filterOutControlCodes(charactersForPage);

        List<PhysicalText> texts = new ArrayList<PhysicalText>(charactersForPage.size());

        for (ETextPosition tp : charactersForPage) {
            texts.add(tp.convertText(fonts));
        }

        final PDRectangle mediaBox = page.findMediaBox();
        Rectangle dimensions = new Rectangle(mediaBox.getLowerLeftX(), mediaBox.getLowerLeftY(),
                mediaBox.getWidth(), mediaBox.getHeight());
        PageContent thisPage = new PageContent(texts, graphicsDrawer.getGraphicContents(), currentPageNo,
                dimensions);

        docContent.addPage(thisPage);
        MDC.remove("page");
    }
}

From source file:org.fit.pdfdom.PDFBoxTree.java

License:Open Source License

protected AffineTransform createCurrentPageTransformation() {
    PDRectangle cb = pdpage.getCropBox();
    AffineTransform pageTransform = new AffineTransform();

    switch (pdpage.getRotation()) {
    case 90:/*w ww.  java 2  s  . c  om*/
        pageTransform.translate(cb.getHeight(), 0);
        break;
    case 180:
        pageTransform.translate(cb.getWidth(), cb.getHeight());
        break;
    case 270:
        pageTransform.translate(0, cb.getWidth());
        break;
    }

    pageTransform.rotate(Math.toRadians(pdpage.getRotation()));
    pageTransform.translate(0, cb.getHeight());
    pageTransform.scale(1, -1);
    pageTransform.translate(-cb.getLowerLeftX(), -cb.getLowerLeftY());

    return pageTransform;
}

From source file:org.nuxeo.pdf.PDFLinks.java

License:Apache License

protected void loadAndPreflightPdf() throws NuxeoException {

    if (pdfDoc == null) {
        pdfDoc = PDFUtils.load(pdfBlob, password);

        @SuppressWarnings("unchecked")
        List<PDPage> allPages = pdfDoc.getDocumentCatalog().getAllPages();
        try {// ww  w.  j av a2s . c o m
            stripper = new PDFTextStripperByArea();
            for (PDPage page : allPages) {
                List<PDAnnotation> annotations = page.getAnnotations();
                for (int j = 0; j < annotations.size(); j++) {
                    PDAnnotation annot = (PDAnnotation) annotations.get(j);
                    if (annot instanceof PDAnnotationLink) {
                        PDAnnotationLink link = (PDAnnotationLink) annot;
                        PDRectangle rect = link.getRectangle();
                        // need to reposition link rectangle to match text space
                        float x = rect.getLowerLeftX();
                        float y = rect.getUpperRightY();
                        float width = rect.getWidth();
                        float height = rect.getHeight();
                        int rotation = page.findRotation();
                        if (rotation == 0) {
                            PDRectangle pageSize = page.findMediaBox();
                            y = pageSize.getHeight() - y;
                        } else if (rotation == 90) {
                            // do nothing
                        }

                        Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                        stripper.addRegion("" + j, awtRect);
                    }
                }
            }
        } catch (IOException e) {
            throw new NuxeoException("Cannot prefilght and prepare regions", e);
        }
    }
}

From source file:org.opencps.util.ExtractTextLocations.java

License:Open Source License

public ExtractTextLocations(String fullPath) throws IOException {

    PDDocument document = null;/*from  ww w.  ja  v a2  s .c  om*/

    try {
        File input = new File(fullPath);
        document = PDDocument.load(input);

        if (document.isEncrypted()) {
            try {
                document.decrypt(StringPool.BLANK);
            } catch (Exception e) {
                _log.error(e);
            }
        }

        // ExtractTextLocations printer = new ExtractTextLocations();

        List allPages = document.getDocumentCatalog().getAllPages();
        if (allPages != null && allPages.size() > 0) {
            PDPage page = (PDPage) allPages.get(0);

            PDStream contents = page.getContents();
            if (contents != null) {
                this.processStream(page, page.findResources(), page.getContents().getStream());
            }

            PDRectangle pageSize = page.findMediaBox();
            if (pageSize != null) {
                setPageWidth(pageSize.getWidth());
                setPageHeight(pageSize.getHeight());
                setPageLLX(pageSize.getLowerLeftX());
                setPageURX(pageSize.getUpperRightX());
                setPageLLY(pageSize.getLowerLeftY());
                setPageURY(pageSize.getUpperRightY());
            }
        }
    } catch (Exception e) {
        _log.error(e);
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract embedded URIs from the PDF-document.
 * //from w  w w  . ja  va  2  s  .com
 */
protected void extractURLs(IParserDocument parserDoc, PDDocument pddDoc) throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    @SuppressWarnings("unchecked")
    final List<PDPage> allPages = pddDocCatalog.getAllPages();
    if (allPages == null || allPages.isEmpty())
        return;

    for (int i = 0; i < allPages.size(); i++) {
        final PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        final PDPage page = (PDPage) allPages.get(i);

        @SuppressWarnings("unchecked")
        final List<PDAnnotation> annotations = page.getAnnotations();
        if (annotations == null || annotations.isEmpty())
            return;

        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDRectangle rect = link.getRectangle();

                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.findRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.findMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            final PDAnnotation annot = (PDAnnotation) annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                final PDAnnotationLink link = (PDAnnotationLink) annot;
                final PDAction action = link.getAction();
                final String urlText = stripper.getTextForRegion("" + j);

                if (action instanceof PDActionURI) {
                    final PDActionURI embeddedUri = (PDActionURI) action;
                    final URI temp = URI.create(embeddedUri.getURI());

                    parserDoc.addReference(temp, urlText, Constants.SERVICE_PID + ":" + PID);
                }
            }
        }
    }
}

From source file:org.polarsys.kitalpha.doc.doc2model.tikaparsing.pdf.Doc2ModelTextStripper.java

License:Apache License

/**
 * This will process a TextPosition object and add the
 * text to the list of characters on a page.  It takes care of
 * overlapping text.//from w  ww  . ja v a  2s.c  o m
 *
 * @param text The text to process.
 */
protected void processTextPosition(TextPosition rawtext) {

    StylizedTextPosition text = (StylizedTextPosition) processStyle(rawtext);

    boolean showCharacter = true;
    if (suppressDuplicateOverlappingText) {
        showCharacter = false;
        String textCharacter = text.getCharacter();
        float textX = text.getX();
        float textY = text.getY();
        List<TextPosition> sameTextCharacters = (List<TextPosition>) characterListMapping.get(textCharacter);
        if (sameTextCharacters == null) {
            sameTextCharacters = new ArrayList<TextPosition>();
            characterListMapping.put(textCharacter, sameTextCharacters);
        }

        // RDD - Here we compute the value that represents the end of the rendered
        // text.  This value is used to determine whether subsequent text rendered
        // on the same line overwrites the current text.
        //
        // We subtract any positive padding to handle cases where extreme amounts
        // of padding are applied, then backed off (not sure why this is done, but there
        // are cases where the padding is on the order of 10x the character width, and
        // the TJ just backs up to compensate after each character).  Also, we subtract
        // an amount to allow for kerning (a percentage of the width of the last
        // character).
        //
        boolean suppressCharacter = false;
        float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f;
        for (int i = 0; i < sameTextCharacters.size() && textCharacter != null; i++) {
            TextPosition character = sameTextCharacters.get(i);
            String charCharacter = character.getCharacter();
            float charX = character.getX();
            float charY = character.getY();
            //only want to suppress

            if (charCharacter != null &&
            //charCharacter.equals( textCharacter ) &&
                    within(charX, textX, tolerance) && within(charY, textY, tolerance)) {
                suppressCharacter = true;
            }
        }
        if (!suppressCharacter) {
            sameTextCharacters.add(text);
            showCharacter = true;
        }
    }

    if (showCharacter) {
        //if we are showing the character then we need to determine which
        //article it belongs to.
        int foundArticleDivisionIndex = -1;
        int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
        int notFoundButFirstLeftArticleDivisionIndex = -1;
        int notFoundButFirstAboveArticleDivisionIndex = -1;
        float x = text.getX();
        float y = text.getY();
        if (shouldSeparateByBeads) {
            for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) {
                PDThreadBead bead = (PDThreadBead) pageArticles.get(i);
                if (bead != null) {
                    PDRectangle rect = bead.getRectangle();
                    if (rect.contains(x, y)) {
                        foundArticleDivisionIndex = i * 2 + 1;
                    } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                            && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) {
                        notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                    } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) {
                        notFoundButFirstLeftArticleDivisionIndex = i * 2;
                    } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) {
                        notFoundButFirstAboveArticleDivisionIndex = i * 2;
                    }
                } else {
                    foundArticleDivisionIndex = 0;
                }
            }
        } else {
            foundArticleDivisionIndex = 0;
        }
        int articleDivisionIndex = -1;
        if (foundArticleDivisionIndex != -1) {
            articleDivisionIndex = foundArticleDivisionIndex;
        } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
        } else if (notFoundButFirstLeftArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
        } else if (notFoundButFirstAboveArticleDivisionIndex != -1) {
            articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
        } else {
            articleDivisionIndex = charactersByArticle.size() - 1;
        }

        List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex);

        /* In the wild, some PDF encoded documents put diacritics (accents on
         * top of characters) into a separate Tj element.  When displaying them
         * graphically, the two chunks get overlayed.  With text output though,
         * we need to do the overlay. This code recombines the diacritic with
         * its associated character if the two are consecutive.
         */
        if (textList.isEmpty()) {
            textList.add(text);
        } else {
            /* test if we overlap the previous entry.  
             * Note that we are making an assumption that we need to only look back
             * one TextPosition to find what we are overlapping.  
             * This may not always be true. */
            TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1);
            if (text.isDiacritic() && previousTextPosition.contains(text)) {
                previousTextPosition.mergeDiacritic(text, normalize);
            }
            /* If the previous TextPosition was the diacritic, merge it into this
             * one and remove it from the list. */
            else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) {
                text.mergeDiacritic(previousTextPosition, normalize);
                textList.remove(textList.size() - 1);
                textList.add(text);
            } else {
                textList.add(text);
            }
        }
    }
}