List of usage examples for com.itextpdf.text.pdf.parser TextRenderInfo getBaseline
public LineSegment getBaseline()
From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java
License:Open Source License
public void renderText(TextRenderInfo renderInfo) { String text = renderInfo.getText(); if (renderInfo.getText().equals("\u00d5")) { text = "\u0150"; }//from ww w .j a v a 2 s. c o m if (renderInfo.getText().equals("\u00f5")) { text = "\u0151"; } if (renderInfo.getText().equals("\u00db")) { text = "\u0170"; } if (renderInfo.getText().equals("\u00ff")) { text = "\u0171"; } if (!map.containsKey(actualPage)) { map.put(actualPage, new TreeMap<Float, StringBuffer>(java.util.Collections.reverseOrder())); } if (!map.get(actualPage).containsKey(renderInfo.getBaseline().getStartPoint().get(Vector.I2))) { map.get(actualPage).put(renderInfo.getBaseline().getStartPoint().get(Vector.I2), new StringBuffer(text)); } else { map.get(actualPage).get(renderInfo.getBaseline().getStartPoint().get(Vector.I2)).append(text); } }
From source file:com.cyberninjas.pdf.TextChunkExtractionStrategy.java
License:Open Source License
@Override public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); if (renderInfo.getRise() != 0) { // remove the rise from the baseline - we do this because the text from a super/subscript render operations should probably be considered as part of the baseline of the text the super/sub is relative to segment = segment.transformBy(new Matrix(0, -renderInfo.getRise())); }/* w w w.ja v a2 s . co m*/ textChunks.add(new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth())); }
From source file:com.uts.tradeconfo.MyTextExtractionStrategy.java
License:Open Source License
/** * Captures text using a simplified algorithm for inserting hard returns and spaces * @param renderInfo render info/*from ww w . j a v a 2s.c o m*/ */ @Override public void renderText(TextRenderInfo renderInfo) { boolean firstRender = result.length() == 0; boolean hardReturn = false; LineSegment segment = renderInfo.getBaseline(); Vector start = segment.getStartPoint(); Vector end = segment.getEndPoint(); if (!firstRender) { Vector x0 = start; Vector x1 = lastStart; Vector x2 = lastEnd; // see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html float dist = (x2.subtract(x1)).cross((x1.subtract(x0))).lengthSquared() / x2.subtract(x1).lengthSquared(); float sameLineThreshold = 1f; // we should probably base this on the current font metrics, but 1 pt seems to be sufficient for the time being if (dist > sameLineThreshold) hardReturn = true; // Note: Technically, we should check both the start and end positions, in case the angle of the text changed without any displacement // but this sort of thing probably doesn't happen much in reality, so we'll leave it alone for now } if (hardReturn) { //System.out.println("<< Hard Return >>"); appendTextChunk("\n"); } else if (!firstRender) { if (result.charAt(result.length() - 1) != ' ' && renderInfo.getText().length() > 0 && renderInfo.getText().charAt(0) != ' ') { // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space float spacing = lastEnd.subtract(start).length(); if (spacing > renderInfo.getSingleSpaceWidth() / 4f) { appendTextChunk(" "); //System.out.println("Inserting implied space before '" + renderInfo.getText() + "'"); } } } else { //System.out.println("Displaying first string of content '" + text + "' :: x1 = " + x1); } //System.out.println("[" + renderInfo.getStartPoint() + "]->[" + renderInfo.getEndPoint() + "] " + renderInfo.getText()); appendTextChunk(renderInfo.getText()); lastStart = start; lastEnd = end; }
From source file:DouDownloader.CustomLocationTextExtractionStrategy.java
License:Open Source License
/** * /*from ww w .j a va2 s .c o m*/ * @see com.itextpdf.text.pdf.parser.RenderListener#renderText(com.itextpdf.text.pdf.parser.TextRenderInfo) */ public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); if (renderInfo.getRise() != 0) { // remove the rise from the baseline - // we do this because the text from // a // super/subscript render operations // should probably be considered as // part of // the baseline of the text the // super/sub is relative to Matrix riseOffsetTransform = new Matrix(0, -renderInfo.getRise()); segment = segment.transformBy(riseOffsetTransform); } TipoTexto tipo = TipoTexto.NORMAL; if (renderInfo.getFont().getFontDescriptor(DocumentFont.CAPHEIGHT, 1000) > 681) { /* * System.out.println("OPA!!!:-- " + renderInfo.getFont().getFontDescriptor( DocumentFont.CAPHEIGHT, 1000) + " x " + * renderInfo.getText()); */ tipo = TipoTexto.AUTONOMOS; } TextChunk location = new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth(), tipo); locationalResult.add(location); }
From source file:org.mortagne.budget.internal.transaction.io.lcl.pdf.LCLLocationTextExtractionStrategy.java
License:Open Source License
/** * @see com.itextpdf.text.pdf.parser.RenderListener#renderText(com.itextpdf.text.pdf.parser.TextRenderInfo) *///w ww .j a va2 s . com public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); TextChunk location = new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth()); this.locationalResult.add(location); }
From source file:pdf_text_extract.DumpTextFragmentPositions.java
License:Open Source License
@Override public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); out.format("%4.0f,%4.0f,%s\n", segment.getStartPoint().get(0), segment.getStartPoint().get(1), renderInfo.getText());/*from ww w .jav a 2s . com*/ }