List of usage examples for org.apache.pdfbox.text TextPosition getTextMatrix
public Matrix getTextMatrix()
From source file:com.formkiq.core.service.generator.pdfbox.TextToPDFieldMapper.java
License:Apache License
/** * Split the Text Position if characters are too far apart. * @param textPositions {@link List} of {@link TextPosition} * @return {@link List} of {@link List} of {@link TextPosition} *//*from w w w . j a va 2 s. com*/ private List<List<TextPosition>> split(final List<TextPosition> textPositions) { final int fudgewidth = 5; List<List<TextPosition>> list = new ArrayList<>(); List<TextPosition> remainder = new ArrayList<>(textPositions); int s = 0; int size = textPositions.size(); for (int i = 1; i < size; i++) { TextPosition p = textPositions.get(i - 1); TextPosition c = textPositions.get(i); float cpos = c.getTextMatrix().getTranslateX(); float ppos = p.getTextMatrix().getTranslateX() + p.getWidth() + fudgewidth; if (cpos > ppos) { List<TextPosition> tp = new ArrayList<>(); for (int j = s; j < i; j++) { tp.add(textPositions.get(j)); remainder.remove(textPositions.get(j)); } list.add(tp); s = i; } } list.add(remainder); return list; }
From source file:com.trollworks.gcs.pdfview.PdfRenderer.java
License:Open Source License
@Override protected void writeString(String text, List<TextPosition> textPositions) throws IOException { text = text.toLowerCase();//w w w . j ava2 s. c om int index = text.indexOf(mTextToHighlight); if (index != -1) { PDPage currentPage = getCurrentPage(); PDRectangle pageBoundingBox = currentPage.getBBox(); AffineTransform flip = new AffineTransform(); flip.translate(0, pageBoundingBox.getHeight()); flip.scale(1, -1); PDRectangle mediaBox = currentPage.getMediaBox(); float mediaHeight = mediaBox.getHeight(); float mediaWidth = mediaBox.getWidth(); int size = textPositions.size(); while (index != -1) { int last = index + mTextToHighlight.length() - 1; for (int i = index; i <= last; i++) { TextPosition pos = textPositions.get(i); PDFont font = pos.getFont(); BoundingBox bbox = font.getBoundingBox(); Rectangle2D.Float rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), font.getWidth(pos.getCharacterCodes()[0]), bbox.getHeight()); AffineTransform at = pos.getTextMatrix().createAffineTransform(); if (font instanceof PDType3Font) { at.concatenate(font.getFontMatrix().createAffineTransform()); } else { at.scale(1 / 1000f, 1 / 1000f); } Shape shape = flip.createTransformedShape(at.createTransformedShape(rect)); AffineTransform transform = mGC.getTransform(); int rotation = currentPage.getRotation(); if (rotation != 0) { switch (rotation) { case 90: mGC.translate(mediaHeight, 0); break; case 270: mGC.translate(0, mediaWidth); break; case 180: mGC.translate(mediaWidth, mediaHeight); break; default: break; } mGC.rotate(Math.toRadians(rotation)); } mGC.fill(shape); if (rotation != 0) { mGC.setTransform(transform); } } index = last < size - 1 ? text.indexOf(mTextToHighlight, last + 1) : -1; } } }
From source file:com.yiyihealth.tools.test.DrawPrintTextLocations.java
License:Apache License
/** * Override the default functionality of PDFTextStripper. *//*w w w . j a va 2 s . c o m*/ @Override protected void writeString(String string, List<TextPosition> textPositions) throws IOException { for (TextPosition text : textPositions) { System.out.println("String[" + text.getXDirAdj() + "," + text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + text.getXScale() + " height=" + text.getHeightDir() + " space=" + text.getWidthOfSpace() + " width=" + text.getWidthDirAdj() + "]" + text.getUnicode()); // in red: // show rectangles with the "height" (not a real height, but used for text extraction // heuristics, it is 1/2 of the bounding box height and starts at y=0) Rectangle2D.Float rect = new Rectangle2D.Float(text.getXDirAdj(), (text.getYDirAdj() - text.getHeightDir()), text.getWidthDirAdj(), text.getHeightDir()); g2d.setColor(Color.red); g2d.draw(rect); // in blue: // show rectangle with the real vertical bounds, based on the font bounding box y values // usually, the height is identical to what you see when marking text in Adobe Reader PDFont font = text.getFont(); BoundingBox bbox = font.getBoundingBox(); // advance width, bbox height (glyph space) float xadvance = font.getWidth(text.getCharacterCodes()[0]); // todo: should iterate all chars rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), xadvance, bbox.getHeight()); // glyph space -> user space // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix AffineTransform at = text.getTextMatrix().createAffineTransform(); if (font instanceof PDType3Font) { // bbox and font matrix are unscaled at.concatenate(font.getFontMatrix().createAffineTransform()); } else { // bbox and font matrix are already scaled to 1000 at.scale(1 / 1000f, 1 / 1000f); } Shape s = at.createTransformedShape(rect); s = flipAT.createTransformedShape(s); s = rotateAT.createTransformedShape(s); g2d.setColor(Color.blue); g2d.draw(s); } }
From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java
License:Apache License
/** * Override the default functionality of PDFTextStripper. *//* w ww.j a v a 2 s . c o m*/ @Override protected void writeString(String string, List<TextPosition> textPositions) throws IOException { for (TextPosition text : textPositions) { System.out.println("String[" + text.getXDirAdj() + "," + text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + text.getXScale() + " height=" + text.getHeightDir() + " space=" + text.getWidthOfSpace() + " width=" + text.getWidthDirAdj() + "]" + text.getUnicode()); // in red: // show rectangles with the "height" (not a real height, but used for text extraction // heuristics, it is 1/2 of the bounding box height and starts at y=0) Rectangle2D.Float rect = new Rectangle2D.Float(text.getXDirAdj(), (text.getYDirAdj() - text.getHeightDir()), text.getWidthDirAdj(), text.getHeightDir()); g2d.setColor(Color.red); g2d.draw(rect); // in blue: // show rectangle with the real vertical bounds, based on the font bounding box y values // usually, the height is identical to what you see when marking text in Adobe Reader PDFont font = text.getFont(); BoundingBox bbox = font.getBoundingBox(); // advance width, bbox height (glyph space) float xadvance = font.getWidth(text.getCharacterCodes()[0]); // todo: should iterate all chars rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), xadvance, bbox.getHeight()); // glyph space -> user space // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix AffineTransform at = text.getTextMatrix().createAffineTransform(); if (font instanceof PDType3Font) { // bbox and font matrix are unscaled at.concatenate(font.getFontMatrix().createAffineTransform()); } else { // bbox and font matrix are already scaled to 1000 at.scale(1 / 1000f, 1 / 1000f); } Shape s = at.createTransformedShape(rect); s = flipAT.createTransformedShape(s); s = rotateAT.createTransformedShape(s); g2d.setColor(Color.blue); g2d.draw(s); } }
From source file:helper.pdfpreprocessing.pdf.TextHighlight.java
License:Apache License
/** * Computes a series of bounding boxes (PDRectangle) from a list of TextPositions. It will create a new bounding box * if the vertical tolerance is exceeded * * @param positions/* w ww . j a v a 2 s . c o m*/ * @throws IOException */ public List<PDRectangle> getTextBoundingBoxes(final List<TextPosition> positions) { final List<PDRectangle> boundingBoxes = new ArrayList<>(); float lowerLeftX = -1, lowerLeftY = -1, upperRightX = -1, upperRightY = -1; boolean first = true; for (final TextPosition position : positions) { if (position == null) { continue; } final Matrix textPos = position.getTextMatrix(); final float height = position.getHeight() * getHeightModifier(); if (first) { lowerLeftX = textPos.getTranslateX(); upperRightX = lowerLeftX + position.getWidth(); lowerLeftY = textPos.getTranslateY(); upperRightY = lowerLeftY + height; first = false; continue; } // we are still on the same line if (Math.abs(textPos.getTranslateY() - lowerLeftY) <= getVerticalTolerance()) { upperRightX = textPos.getTranslateX() + position.getWidth(); upperRightY = textPos.getTranslateY() + height; } else { final PDRectangle boundingBox = boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY); boundingBoxes.add(boundingBox); // new line lowerLeftX = textPos.getTranslateX(); upperRightX = lowerLeftX + position.getWidth(); lowerLeftY = textPos.getTranslateY(); upperRightY = lowerLeftY + height; } } if (!(lowerLeftX == -1 && lowerLeftY == -1 && upperRightX == -1 && upperRightY == -1)) { final PDRectangle boundingBox = boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY); boundingBoxes.add(boundingBox); } return boundingBoxes; }