List of usage examples for com.itextpdf.text.pdf.parser LineSegment getEndPoint
public Vector getEndPoint()
From source file:com.cyberninjas.pdf.TextChunkExtractionStrategy.java
License:Open Source License
@Override public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); if (renderInfo.getRise() != 0) { // remove the rise from the baseline - we do this because the text from a super/subscript render operations should probably be considered as part of the baseline of the text the super/sub is relative to segment = segment.transformBy(new Matrix(0, -renderInfo.getRise())); }/* w w w .j a v a2 s . com*/ textChunks.add(new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth())); }
From source file:com.uts.tradeconfo.MyTextExtractionStrategy.java
License:Open Source License
/** * Captures text using a simplified algorithm for inserting hard returns and spaces * @param renderInfo render info//from w w w .j a va 2s . com */ @Override public void renderText(TextRenderInfo renderInfo) { boolean firstRender = result.length() == 0; boolean hardReturn = false; LineSegment segment = renderInfo.getBaseline(); Vector start = segment.getStartPoint(); Vector end = segment.getEndPoint(); if (!firstRender) { Vector x0 = start; Vector x1 = lastStart; Vector x2 = lastEnd; // see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html float dist = (x2.subtract(x1)).cross((x1.subtract(x0))).lengthSquared() / x2.subtract(x1).lengthSquared(); float sameLineThreshold = 1f; // we should probably base this on the current font metrics, but 1 pt seems to be sufficient for the time being if (dist > sameLineThreshold) hardReturn = true; // Note: Technically, we should check both the start and end positions, in case the angle of the text changed without any displacement // but this sort of thing probably doesn't happen much in reality, so we'll leave it alone for now } if (hardReturn) { //System.out.println("<< Hard Return >>"); appendTextChunk("\n"); } else if (!firstRender) { if (result.charAt(result.length() - 1) != ' ' && renderInfo.getText().length() > 0 && renderInfo.getText().charAt(0) != ' ') { // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space float spacing = lastEnd.subtract(start).length(); if (spacing > renderInfo.getSingleSpaceWidth() / 4f) { appendTextChunk(" "); //System.out.println("Inserting implied space before '" + renderInfo.getText() + "'"); } } } else { //System.out.println("Displaying first string of content '" + text + "' :: x1 = " + x1); } //System.out.println("[" + renderInfo.getStartPoint() + "]->[" + renderInfo.getEndPoint() + "] " + renderInfo.getText()); appendTextChunk(renderInfo.getText()); lastStart = start; lastEnd = end; }
From source file:DouDownloader.CustomLocationTextExtractionStrategy.java
License:Open Source License
/** * //from w ww .j a v a2s .c o m * @see com.itextpdf.text.pdf.parser.RenderListener#renderText(com.itextpdf.text.pdf.parser.TextRenderInfo) */ public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); if (renderInfo.getRise() != 0) { // remove the rise from the baseline - // we do this because the text from // a // super/subscript render operations // should probably be considered as // part of // the baseline of the text the // super/sub is relative to Matrix riseOffsetTransform = new Matrix(0, -renderInfo.getRise()); segment = segment.transformBy(riseOffsetTransform); } TipoTexto tipo = TipoTexto.NORMAL; if (renderInfo.getFont().getFontDescriptor(DocumentFont.CAPHEIGHT, 1000) > 681) { /* * System.out.println("OPA!!!:-- " + renderInfo.getFont().getFontDescriptor( DocumentFont.CAPHEIGHT, 1000) + " x " + * renderInfo.getText()); */ tipo = TipoTexto.AUTONOMOS; } TextChunk location = new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth(), tipo); locationalResult.add(location); }
From source file:mkl.testarea.itext5.pdfcleanup.PdfCleanUpRegionFilter.java
License:Open Source License
/** * Checks if the text is inside render filter region. *///from w ww . j a v a 2 s. c om @Override public boolean allowText(TextRenderInfo renderInfo) { LineSegment ascent = renderInfo.getAscentLine(); LineSegment descent = renderInfo.getDescentLine(); Point2D[] glyphRect = new Point2D[] { new Point2D.Float(ascent.getStartPoint().get(0), ascent.getStartPoint().get(1)), new Point2D.Float(ascent.getEndPoint().get(0), ascent.getEndPoint().get(1)), new Point2D.Float(descent.getEndPoint().get(0), descent.getEndPoint().get(1)), new Point2D.Float(descent.getStartPoint().get(0), descent.getStartPoint().get(1)), }; for (Rectangle rectangle : rectangles) { Point2D[] redactRect = getVertices(rectangle); if (intersect(glyphRect, redactRect)) { return false; } } return true; }
From source file:mkl.testarea.itext5.pdfcleanup.PdfCleanUpRenderListener.java
License:Open Source License
public void renderText(TextRenderInfo renderInfo) { if (renderInfo.getPdfString().toUnicodeString().length() == 0) { return;// ww w.j ava 2 s . c o m } for (TextRenderInfo ri : renderInfo.getCharacterRenderInfos()) { boolean isAllowed = filter.allowText(ri); LineSegment baseline = ri.getUnscaledBaseline(); chunks.add(new PdfCleanUpContentChunk.Text(ri.getPdfString(), baseline.getStartPoint(), baseline.getEndPoint(), isAllowed, strNumber)); } ++strNumber; }
From source file:org.mortagne.budget.internal.transaction.io.lcl.pdf.LCLLocationTextExtractionStrategy.java
License:Open Source License
/** * @see com.itextpdf.text.pdf.parser.RenderListener#renderText(com.itextpdf.text.pdf.parser.TextRenderInfo) *//*from ww w .j av a2s . co m*/ public void renderText(TextRenderInfo renderInfo) { LineSegment segment = renderInfo.getBaseline(); TextChunk location = new TextChunk(renderInfo.getText(), segment.getStartPoint(), segment.getEndPoint(), renderInfo.getSingleSpaceWidth()); this.locationalResult.add(location); }