Example usage for org.apache.pdfbox.text TextPosition TextPosition

List of usage examples for org.apache.pdfbox.text TextPosition TextPosition

Introduction

In this page you can find the example usage for org.apache.pdfbox.text TextPosition TextPosition.

Prototype

public TextPosition(int pageRotation, float pageWidth, float pageHeight, Matrix textMatrix, float endX,
        float endY, float maxHeight, float individualWidth, float spaceWidth, String unicode, int[] charCodes,
        PDFont font, float fontSize, int fontSizeInPt) 

Source Link

Document

Constructor.

Usage

From source file:com.repeatability.pdf.PDFTextStreamEngine.java

License:Apache License

/**
 * This method was originally written by Ben Litchfield for PDFStreamEngine.
 *//*from w  ww  .  jav  a2 s .  c o m*/
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement)
        throws IOException {
    //
    // legacy calculations which were previously in PDFStreamEngine
    //

    PDGraphicsState state = getGraphicsState();
    Matrix ctm = state.getCurrentTransformationMatrix();
    float fontSize = state.getTextState().getFontSize();
    float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
    Matrix textMatrix = getTextMatrix();

    BoundingBox bbox = font.getBoundingBox();
    if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
        // PDFBOX-2158 and PDFBOX-3130
        // files by Salmat eSolutions / ClibPDF Library
        bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
    }
    // 1/2 the bbox is used as the height todo: why?
    float glyphHeight = bbox.getHeight() / 2;

    // sometimes the bbox has very high values, but CapHeight is OK
    PDFontDescriptor fontDescriptor = font.getFontDescriptor();
    if (fontDescriptor != null) {
        float capHeight = fontDescriptor.getCapHeight();
        if (capHeight != 0 && capHeight < glyphHeight) {
            glyphHeight = capHeight;
        }
    }

    // transformPoint from glyph space -> text space
    float height;
    if (font instanceof PDType3Font) {
        height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
    } else {
        height = glyphHeight / 1000;
    }

    float displacementX = displacement.getX();
    // the sorting algorithm is based on the width of the character. As the displacement
    // for vertical characters doesn't provide any suitable value for it, we have to 
    // calculate our own
    if (font.isVertical()) {
        displacementX = font.getWidth(code) / 1000;
        // there may be an additional scaling factor for true type fonts
        TrueTypeFont ttf = null;
        if (font instanceof PDTrueTypeFont) {
            ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
        } else if (font instanceof PDType0Font) {
            PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
            if (cidFont instanceof PDCIDFontType2) {
                ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
            }
        }
        if (ttf != null && ttf.getUnitsPerEm() != 1000) {
            displacementX *= 1000f / ttf.getUnitsPerEm();
        }
    }
    // (modified) combined displacement, this is calculated *without* taking the character
    // spacing and word spacing into account, due to legacy code in TextStripper
    float tx = displacementX * fontSize * horizontalScaling;
    float ty = displacement.getY() * fontSize;

    // (modified) combined displacement matrix
    Matrix td = Matrix.getTranslateInstance(tx, ty);

    // (modified) text rendering matrix
    Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
    float nextX = nextTextRenderingMatrix.getTranslateX();
    float nextY = nextTextRenderingMatrix.getTranslateY();

    // (modified) width and height calculations
    float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
    float dyDisplay = height * textRenderingMatrix.getScalingFactorY();

    //
    // start of the original method
    //

    // Note on variable names. There are three different units being used in this code.
    // Character sizes are given in glyph units, text locations are initially given in text
    // units, and we want to save the data in display units. The variable names should end with
    // Text or Disp to represent if the values are in text or disp units (no glyph units are
    // saved).

    float glyphSpaceToTextSpaceFactor = 1 / 1000f;
    if (font instanceof PDType3Font) {
        glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
    }

    float spaceWidthText = 0;
    try {
        // to avoid crash as described in PDFBOX-614, see what the space displacement should be
        spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
    } catch (Throwable exception) {
        LOG.warn(exception, exception);
    }

    if (spaceWidthText == 0) {
        spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
        // the average space width appears to be higher than necessary so make it smaller
        spaceWidthText *= .80f;
    }
    if (spaceWidthText == 0) {
        spaceWidthText = 1.0f; // if could not find font, use a generic value
    }

    // the space width has to be transformed into display units
    float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX();

    // use our additional glyph list for Unicode mapping
    unicode = font.toUnicode(code, glyphList);

    // when there is no Unicode mapping available, Acrobat simply coerces the character code
    // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
    // this, which is why we leave it until this point in PDFTextStreamEngine.
    if (unicode == null) {
        if (font instanceof PDSimpleFont) {
            char c = (char) code;
            unicode = new String(new char[] { c });
        } else {
            // Acrobat doesn't seem to coerce composite font's character codes, instead it
            // skips them. See the "allah2.pdf" TestTextStripper file.
            return;
        }
    }

    // adjust for cropbox if needed
    Matrix translatedTextRenderingMatrix;
    if (translateMatrix == null) {
        translatedTextRenderingMatrix = textRenderingMatrix;
    } else {
        translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
        nextX -= pageSize.getLowerLeftX();
        nextY -= pageSize.getLowerLeftY();
    }

    processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(),
            translatedTextRenderingMatrix, nextX, nextY, dyDisplay, dxDisplay, spaceWidthDisplay, unicode,
            new int[] { code }, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
}