Example usage for org.apache.pdfbox.pdmodel.font PDFont getAverageFontWidth

List of usage examples for org.apache.pdfbox.pdmodel.font PDFont getAverageFontWidth

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.font PDFont getAverageFontWidth.

Prototype


@Override
public float getAverageFontWidth() 

Source Link

Document

This will get the average font width for all characters.

Usage

From source file:com.repeatability.pdf.PDFTextStreamEngine.java

License:Apache License

/**
 * This method was originally written by Ben Litchfield for PDFStreamEngine.
 *//*from  ww  w  .j a v  a 2 s  .c  o  m*/
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement)
        throws IOException {
    //
    // legacy calculations which were previously in PDFStreamEngine
    //

    PDGraphicsState state = getGraphicsState();
    Matrix ctm = state.getCurrentTransformationMatrix();
    float fontSize = state.getTextState().getFontSize();
    float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
    Matrix textMatrix = getTextMatrix();

    BoundingBox bbox = font.getBoundingBox();
    if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
        // PDFBOX-2158 and PDFBOX-3130
        // files by Salmat eSolutions / ClibPDF Library
        bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
    }
    // 1/2 the bbox is used as the height todo: why?
    float glyphHeight = bbox.getHeight() / 2;

    // sometimes the bbox has very high values, but CapHeight is OK
    PDFontDescriptor fontDescriptor = font.getFontDescriptor();
    if (fontDescriptor != null) {
        float capHeight = fontDescriptor.getCapHeight();
        if (capHeight != 0 && capHeight < glyphHeight) {
            glyphHeight = capHeight;
        }
    }

    // transformPoint from glyph space -> text space
    float height;
    if (font instanceof PDType3Font) {
        height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
    } else {
        height = glyphHeight / 1000;
    }

    float displacementX = displacement.getX();
    // the sorting algorithm is based on the width of the character. As the displacement
    // for vertical characters doesn't provide any suitable value for it, we have to 
    // calculate our own
    if (font.isVertical()) {
        displacementX = font.getWidth(code) / 1000;
        // there may be an additional scaling factor for true type fonts
        TrueTypeFont ttf = null;
        if (font instanceof PDTrueTypeFont) {
            ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
        } else if (font instanceof PDType0Font) {
            PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
            if (cidFont instanceof PDCIDFontType2) {
                ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
            }
        }
        if (ttf != null && ttf.getUnitsPerEm() != 1000) {
            displacementX *= 1000f / ttf.getUnitsPerEm();
        }
    }
    // (modified) combined displacement, this is calculated *without* taking the character
    // spacing and word spacing into account, due to legacy code in TextStripper
    float tx = displacementX * fontSize * horizontalScaling;
    float ty = displacement.getY() * fontSize;

    // (modified) combined displacement matrix
    Matrix td = Matrix.getTranslateInstance(tx, ty);

    // (modified) text rendering matrix
    Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
    float nextX = nextTextRenderingMatrix.getTranslateX();
    float nextY = nextTextRenderingMatrix.getTranslateY();

    // (modified) width and height calculations
    float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
    float dyDisplay = height * textRenderingMatrix.getScalingFactorY();

    //
    // start of the original method
    //

    // Note on variable names. There are three different units being used in this code.
    // Character sizes are given in glyph units, text locations are initially given in text
    // units, and we want to save the data in display units. The variable names should end with
    // Text or Disp to represent if the values are in text or disp units (no glyph units are
    // saved).

    float glyphSpaceToTextSpaceFactor = 1 / 1000f;
    if (font instanceof PDType3Font) {
        glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
    }

    float spaceWidthText = 0;
    try {
        // to avoid crash as described in PDFBOX-614, see what the space displacement should be
        spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
    } catch (Throwable exception) {
        LOG.warn(exception, exception);
    }

    if (spaceWidthText == 0) {
        spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
        // the average space width appears to be higher than necessary so make it smaller
        spaceWidthText *= .80f;
    }
    if (spaceWidthText == 0) {
        spaceWidthText = 1.0f; // if could not find font, use a generic value
    }

    // the space width has to be transformed into display units
    float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX();

    // use our additional glyph list for Unicode mapping
    unicode = font.toUnicode(code, glyphList);

    // when there is no Unicode mapping available, Acrobat simply coerces the character code
    // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
    // this, which is why we leave it until this point in PDFTextStreamEngine.
    if (unicode == null) {
        if (font instanceof PDSimpleFont) {
            char c = (char) code;
            unicode = new String(new char[] { c });
        } else {
            // Acrobat doesn't seem to coerce composite font's character codes, instead it
            // skips them. See the "allah2.pdf" TestTextStripper file.
            return;
        }
    }

    // adjust for cropbox if needed
    Matrix translatedTextRenderingMatrix;
    if (translateMatrix == null) {
        translatedTextRenderingMatrix = textRenderingMatrix;
    } else {
        translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
        nextX -= pageSize.getLowerLeftX();
        nextY -= pageSize.getLowerLeftY();
    }

    processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(),
            translatedTextRenderingMatrix, nextX, nextY, dyDisplay, dxDisplay, spaceWidthDisplay, unicode,
            new int[] { code }, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
}

From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java

License:Apache License

/**
 * Old version/*w ww  .jav  a 2s .  c  o  m*/
 */
public void processEncodedText(@NotNull byte[] string) throws IOException {

    /*
     *  Note on variable names.  There are three different units being used
     *     in this code.  Character sizes are given in glyph units, text locations
     *     are initially given in text units, and we want to save the data in
     *     display units. The variable names should end with Text or Disp to
     *     represent if the values are in text or disp units (no glyph units are saved).
     */
    final float fontSizeText = getGraphicsState().getTextState().getFontSize();
    final float horizontalScalingText = getGraphicsState().getTextState().getHorizontalScalingPercent() / 100f;

    // float verticalScalingText = horizontalScaling;//not sure if this is right but what else to
    // do???
    final float riseText = getGraphicsState().getTextState().getRise();
    final float wordSpacingText = getGraphicsState().getTextState().getWordSpacing();
    final float characterSpacingText = getGraphicsState().getTextState().getCharacterSpacing();

    /*
     *  We won't know the actual number of characters until
     * we process the byte data(could be two bytes each) but
     * it won't ever be more than string.length*2(there are some cases
     * were a single byte will result in two output characters "fi"
     */
    final PDFont font = getGraphicsState().getTextState().getFont();

    /*
     *  This will typically be 1000 but in the case of a type3 font this might be a different
     * number
     */
    final float glyphSpaceToTextSpaceFactor;

    if (font instanceof PDType3Font) {
        PDMatrix fontMatrix = font.getFontMatrix();
        float fontMatrixXScaling = fontMatrix.getValue(0, 0);

        glyphSpaceToTextSpaceFactor = 1.0f / fontMatrixXScaling;
    } else {
        glyphSpaceToTextSpaceFactor = /* 1.0f / */ 1000f;
    }

    float spaceWidthText = 0.0F;

    try {
        spaceWidthText = (font.getFontWidth(SPACE_BYTES, 0, 1) / glyphSpaceToTextSpaceFactor);
    } catch (Throwable exception) {
        log.warn(exception, exception);
    }

    if (spaceWidthText == 0.0F) {
        spaceWidthText = (font.getAverageFontWidth() / glyphSpaceToTextSpaceFactor);
        spaceWidthText *= .80f;
    }

    /* Convert textMatrix to display units */
    final Matrix initialMatrix = new Matrix();

    initialMatrix.setValue(0, 0, 1.0F);
    initialMatrix.setValue(0, 1, 0.0F);
    initialMatrix.setValue(0, 2, 0.0F);
    initialMatrix.setValue(1, 0, 0.0F);
    initialMatrix.setValue(1, 1, 1.0F);
    initialMatrix.setValue(1, 2, 0.0F);
    initialMatrix.setValue(2, 0, 0.0F);
    initialMatrix.setValue(2, 1, riseText);
    initialMatrix.setValue(2, 2, 1.0F);

    final Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
    final Matrix dispMatrix = initialMatrix.multiply(ctm);
    Matrix textMatrixStDisp = getTextMatrix().multiply(dispMatrix);
    final float xScaleDisp = textMatrixStDisp.getXScale();
    final float yScaleDisp = textMatrixStDisp.getYScale();
    final float spaceWidthDisp = spaceWidthText * xScaleDisp * fontSizeText;
    final float wordSpacingDisp = wordSpacingText * xScaleDisp * fontSizeText;
    float maxVerticalDisplacementText = 0.0F;
    StringBuilder characterBuffer = new StringBuilder(string.length);
    int codeLength = 1;

    for (int i = 0; i < string.length; i += codeLength) {

        // Decode the value to a Unicode character
        codeLength = 1;

        String c = font.encode(string, i, codeLength);

        if ((c == null) && (i + 1 < string.length)) {

            // maybe a multibyte encoding
            codeLength++;
            c = font.encode(string, i, codeLength);
        }

        c = inspectFontEncoding(c);

        // todo, handle horizontal displacement
        // get the width and height of this character in text units
        float fontWidth = font.getFontWidth(string, i, codeLength) * 0.95f;

        if (fontWidth == 0.0f) {
            fontWidth = spaceWidthDisp;
        }

        float characterHorizontalDisplacementText = (fontWidth / glyphSpaceToTextSpaceFactor);

        maxVerticalDisplacementText = Math.max(maxVerticalDisplacementText,
                font.getFontHeight(string, i, codeLength) / glyphSpaceToTextSpaceFactor);

        if (maxVerticalDisplacementText <= 0.0f) {
            maxVerticalDisplacementText = font.getFontBoundingBox().getHeight() / glyphSpaceToTextSpaceFactor;
        }

        /**
         * PDF Spec - 5.5.2 Word Spacing
         *
         * Word spacing works the same was as character spacing, but applies
         * only to the space character, code 32.
         *
         * Note: Word spacing is applied to every occurrence of the single-byte
         * character code 32 in a string.  This can occur when using a simple
         * font or a composite font that defines code 32 as a single-byte code.
         * It does not apply to occurrences of the byte value 32 in multiple-byte
         * codes.
         *
         * RDD - My interpretation of this is that only character code 32's that
         * encode to spaces should have word spacing applied.  Cases have been
         * observed where a font has a space character with a character code
         * other than 32, and where word spacing (Tw) was used.  In these cases,
         * applying word spacing to either the non-32 space or to the character
         * code 32 non-space resulted in errors consistent with this interpretation.
         */
        float spacingText = characterSpacingText;

        if ((string[i] == (byte) 0x20) && (codeLength == 1)) {
            spacingText += wordSpacingText;
        }

        /*
         *  The text matrix gets updated after each glyph is placed.  The updated
         *          version will have the X and Y coordinates for the next glyph.
         */
        Matrix glyphMatrixStDisp = getTextMatrix().multiply(dispMatrix);

        // The adjustment will always be zero.  The adjustment as shown in the
        // TJ operator will be handled separately.
        float adjustment = 0.0F;

        // TODO : tx should be set for horizontal text and ty for vertical text
        // which seems to be specified in the font (not the direction in the matrix).
        float tx = ((characterHorizontalDisplacementText - adjustment / glyphSpaceToTextSpaceFactor)
                * fontSizeText) * horizontalScalingText;
        Matrix td = new Matrix();

        td.setValue(2, 0, tx);

        float ty = 0.0F;

        td.setValue(2, 1, ty);
        setTextMatrix(td.multiply(getTextMatrix()));

        Matrix glyphMatrixEndDisp = getTextMatrix().multiply(dispMatrix);
        float sx = spacingText * horizontalScalingText;
        Matrix sd = new Matrix();

        sd.setValue(2, 0, sx);

        float sy = 0.0F;

        sd.setValue(2, 1, sy);
        setTextMatrix(sd.multiply(getTextMatrix()));

        float widthText = glyphMatrixEndDisp.getXPosition() - glyphMatrixStDisp.getXPosition();

        characterBuffer.append(c);

        Matrix textMatrixEndDisp = glyphMatrixEndDisp;
        float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText * yScaleDisp;

        try {
            final ETextPosition text = new ETextPosition(page, textMatrixStDisp, textMatrixEndDisp,
                    totalVerticalDisplacementDisp, new float[] { widthText }, spaceWidthDisp,
                    characterBuffer.toString(), font, fontSizeText,
                    (int) (fontSizeText * getTextMatrix().getXScale()), wordSpacingDisp);

            correctPosition(font, string, i, c, fontSizeText, glyphSpaceToTextSpaceFactor,
                    horizontalScalingText, codeLength, text);
            processTextPosition(text);
        } catch (Exception e) {
            log.warn("LOG00570:Error adding '" + characterBuffer + "': " + e.getMessage());
        }

        textMatrixStDisp = getTextMatrix().multiply(dispMatrix);
        characterBuffer.setLength(0);
    }
}