List of usage examples for org.apache.pdfbox.pdmodel.font PDFont getAverageFontWidth
@Override public float getAverageFontWidth()
From source file:com.repeatability.pdf.PDFTextStreamEngine.java
License:Apache License
/** * This method was originally written by Ben Litchfield for PDFStreamEngine. *//*from ww w .j a v a 2 s .c o m*/ @Override protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException { // // legacy calculations which were previously in PDFStreamEngine // PDGraphicsState state = getGraphicsState(); Matrix ctm = state.getCurrentTransformationMatrix(); float fontSize = state.getTextState().getFontSize(); float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f; Matrix textMatrix = getTextMatrix(); BoundingBox bbox = font.getBoundingBox(); if (bbox.getLowerLeftY() < Short.MIN_VALUE) { // PDFBOX-2158 and PDFBOX-3130 // files by Salmat eSolutions / ClibPDF Library bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536)); } // 1/2 the bbox is used as the height todo: why? float glyphHeight = bbox.getHeight() / 2; // sometimes the bbox has very high values, but CapHeight is OK PDFontDescriptor fontDescriptor = font.getFontDescriptor(); if (fontDescriptor != null) { float capHeight = fontDescriptor.getCapHeight(); if (capHeight != 0 && capHeight < glyphHeight) { glyphHeight = capHeight; } } // transformPoint from glyph space -> text space float height; if (font instanceof PDType3Font) { height = font.getFontMatrix().transformPoint(0, glyphHeight).y; } else { height = glyphHeight / 1000; } float displacementX = displacement.getX(); // the sorting algorithm is based on the width of the character. As the displacement // for vertical characters doesn't provide any suitable value for it, we have to // calculate our own if (font.isVertical()) { displacementX = font.getWidth(code) / 1000; // there may be an additional scaling factor for true type fonts TrueTypeFont ttf = null; if (font instanceof PDTrueTypeFont) { ttf = ((PDTrueTypeFont) font).getTrueTypeFont(); } else if (font instanceof PDType0Font) { PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont(); if (cidFont instanceof PDCIDFontType2) { ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont(); } } if (ttf != null && ttf.getUnitsPerEm() != 1000) { displacementX *= 1000f / ttf.getUnitsPerEm(); } } // (modified) combined displacement, this is calculated *without* taking the character // spacing and word spacing into account, due to legacy code in TextStripper float tx = displacementX * fontSize * horizontalScaling; float ty = displacement.getY() * fontSize; // (modified) combined displacement matrix Matrix td = Matrix.getTranslateInstance(tx, ty); // (modified) text rendering matrix Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space float nextX = nextTextRenderingMatrix.getTranslateX(); float nextY = nextTextRenderingMatrix.getTranslateY(); // (modified) width and height calculations float dxDisplay = nextX - textRenderingMatrix.getTranslateX(); float dyDisplay = height * textRenderingMatrix.getScalingFactorY(); // // start of the original method // // Note on variable names. There are three different units being used in this code. // Character sizes are given in glyph units, text locations are initially given in text // units, and we want to save the data in display units. The variable names should end with // Text or Disp to represent if the values are in text or disp units (no glyph units are // saved). float glyphSpaceToTextSpaceFactor = 1 / 1000f; if (font instanceof PDType3Font) { glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX(); } float spaceWidthText = 0; try { // to avoid crash as described in PDFBOX-614, see what the space displacement should be spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor; } catch (Throwable exception) { LOG.warn(exception, exception); } if (spaceWidthText == 0) { spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor; // the average space width appears to be higher than necessary so make it smaller spaceWidthText *= .80f; } if (spaceWidthText == 0) { spaceWidthText = 1.0f; // if could not find font, use a generic value } // the space width has to be transformed into display units float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX(); // use our additional glyph list for Unicode mapping unicode = font.toUnicode(code, glyphList); // when there is no Unicode mapping available, Acrobat simply coerces the character code // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want // this, which is why we leave it until this point in PDFTextStreamEngine. if (unicode == null) { if (font instanceof PDSimpleFont) { char c = (char) code; unicode = new String(new char[] { c }); } else { // Acrobat doesn't seem to coerce composite font's character codes, instead it // skips them. See the "allah2.pdf" TestTextStripper file. return; } } // adjust for cropbox if needed Matrix translatedTextRenderingMatrix; if (translateMatrix == null) { translatedTextRenderingMatrix = textRenderingMatrix; } else { translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix); nextX -= pageSize.getLowerLeftX(); nextY -= pageSize.getLowerLeftY(); } processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, dyDisplay, dxDisplay, spaceWidthDisplay, unicode, new int[] { code }, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX()))); }
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java
License:Apache License
/** * Old version/*w ww .jav a 2s . c o m*/ */ public void processEncodedText(@NotNull byte[] string) throws IOException { /* * Note on variable names. There are three different units being used * in this code. Character sizes are given in glyph units, text locations * are initially given in text units, and we want to save the data in * display units. The variable names should end with Text or Disp to * represent if the values are in text or disp units (no glyph units are saved). */ final float fontSizeText = getGraphicsState().getTextState().getFontSize(); final float horizontalScalingText = getGraphicsState().getTextState().getHorizontalScalingPercent() / 100f; // float verticalScalingText = horizontalScaling;//not sure if this is right but what else to // do??? final float riseText = getGraphicsState().getTextState().getRise(); final float wordSpacingText = getGraphicsState().getTextState().getWordSpacing(); final float characterSpacingText = getGraphicsState().getTextState().getCharacterSpacing(); /* * We won't know the actual number of characters until * we process the byte data(could be two bytes each) but * it won't ever be more than string.length*2(there are some cases * were a single byte will result in two output characters "fi" */ final PDFont font = getGraphicsState().getTextState().getFont(); /* * This will typically be 1000 but in the case of a type3 font this might be a different * number */ final float glyphSpaceToTextSpaceFactor; if (font instanceof PDType3Font) { PDMatrix fontMatrix = font.getFontMatrix(); float fontMatrixXScaling = fontMatrix.getValue(0, 0); glyphSpaceToTextSpaceFactor = 1.0f / fontMatrixXScaling; } else { glyphSpaceToTextSpaceFactor = /* 1.0f / */ 1000f; } float spaceWidthText = 0.0F; try { spaceWidthText = (font.getFontWidth(SPACE_BYTES, 0, 1) / glyphSpaceToTextSpaceFactor); } catch (Throwable exception) { log.warn(exception, exception); } if (spaceWidthText == 0.0F) { spaceWidthText = (font.getAverageFontWidth() / glyphSpaceToTextSpaceFactor); spaceWidthText *= .80f; } /* Convert textMatrix to display units */ final Matrix initialMatrix = new Matrix(); initialMatrix.setValue(0, 0, 1.0F); initialMatrix.setValue(0, 1, 0.0F); initialMatrix.setValue(0, 2, 0.0F); initialMatrix.setValue(1, 0, 0.0F); initialMatrix.setValue(1, 1, 1.0F); initialMatrix.setValue(1, 2, 0.0F); initialMatrix.setValue(2, 0, 0.0F); initialMatrix.setValue(2, 1, riseText); initialMatrix.setValue(2, 2, 1.0F); final Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); final Matrix dispMatrix = initialMatrix.multiply(ctm); Matrix textMatrixStDisp = getTextMatrix().multiply(dispMatrix); final float xScaleDisp = textMatrixStDisp.getXScale(); final float yScaleDisp = textMatrixStDisp.getYScale(); final float spaceWidthDisp = spaceWidthText * xScaleDisp * fontSizeText; final float wordSpacingDisp = wordSpacingText * xScaleDisp * fontSizeText; float maxVerticalDisplacementText = 0.0F; StringBuilder characterBuffer = new StringBuilder(string.length); int codeLength = 1; for (int i = 0; i < string.length; i += codeLength) { // Decode the value to a Unicode character codeLength = 1; String c = font.encode(string, i, codeLength); if ((c == null) && (i + 1 < string.length)) { // maybe a multibyte encoding codeLength++; c = font.encode(string, i, codeLength); } c = inspectFontEncoding(c); // todo, handle horizontal displacement // get the width and height of this character in text units float fontWidth = font.getFontWidth(string, i, codeLength) * 0.95f; if (fontWidth == 0.0f) { fontWidth = spaceWidthDisp; } float characterHorizontalDisplacementText = (fontWidth / glyphSpaceToTextSpaceFactor); maxVerticalDisplacementText = Math.max(maxVerticalDisplacementText, font.getFontHeight(string, i, codeLength) / glyphSpaceToTextSpaceFactor); if (maxVerticalDisplacementText <= 0.0f) { maxVerticalDisplacementText = font.getFontBoundingBox().getHeight() / glyphSpaceToTextSpaceFactor; } /** * PDF Spec - 5.5.2 Word Spacing * * Word spacing works the same was as character spacing, but applies * only to the space character, code 32. * * Note: Word spacing is applied to every occurrence of the single-byte * character code 32 in a string. This can occur when using a simple * font or a composite font that defines code 32 as a single-byte code. * It does not apply to occurrences of the byte value 32 in multiple-byte * codes. * * RDD - My interpretation of this is that only character code 32's that * encode to spaces should have word spacing applied. Cases have been * observed where a font has a space character with a character code * other than 32, and where word spacing (Tw) was used. In these cases, * applying word spacing to either the non-32 space or to the character * code 32 non-space resulted in errors consistent with this interpretation. */ float spacingText = characterSpacingText; if ((string[i] == (byte) 0x20) && (codeLength == 1)) { spacingText += wordSpacingText; } /* * The text matrix gets updated after each glyph is placed. The updated * version will have the X and Y coordinates for the next glyph. */ Matrix glyphMatrixStDisp = getTextMatrix().multiply(dispMatrix); // The adjustment will always be zero. The adjustment as shown in the // TJ operator will be handled separately. float adjustment = 0.0F; // TODO : tx should be set for horizontal text and ty for vertical text // which seems to be specified in the font (not the direction in the matrix). float tx = ((characterHorizontalDisplacementText - adjustment / glyphSpaceToTextSpaceFactor) * fontSizeText) * horizontalScalingText; Matrix td = new Matrix(); td.setValue(2, 0, tx); float ty = 0.0F; td.setValue(2, 1, ty); setTextMatrix(td.multiply(getTextMatrix())); Matrix glyphMatrixEndDisp = getTextMatrix().multiply(dispMatrix); float sx = spacingText * horizontalScalingText; Matrix sd = new Matrix(); sd.setValue(2, 0, sx); float sy = 0.0F; sd.setValue(2, 1, sy); setTextMatrix(sd.multiply(getTextMatrix())); float widthText = glyphMatrixEndDisp.getXPosition() - glyphMatrixStDisp.getXPosition(); characterBuffer.append(c); Matrix textMatrixEndDisp = glyphMatrixEndDisp; float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText * yScaleDisp; try { final ETextPosition text = new ETextPosition(page, textMatrixStDisp, textMatrixEndDisp, totalVerticalDisplacementDisp, new float[] { widthText }, spaceWidthDisp, characterBuffer.toString(), font, fontSizeText, (int) (fontSizeText * getTextMatrix().getXScale()), wordSpacingDisp); correctPosition(font, string, i, c, fontSizeText, glyphSpaceToTextSpaceFactor, horizontalScalingText, codeLength, text); processTextPosition(text); } catch (Exception e) { log.warn("LOG00570:Error adding '" + characterBuffer + "': " + e.getMessage()); } textMatrixStDisp = getTextMatrix().multiply(dispMatrix); characterBuffer.setLength(0); } }