List of usage examples for com.itextpdf.text.pdf.parser Vector Vector
public Vector(final float x, final float y, final float z)
From source file:com.cyberninjas.invoice.pdf.InvoiceTextExtractionStrategy.java
License:Open Source License
/** * Parses the text with a PDF based on the given settings. * * @param settings settings used to parse the document. *///from w ww . j a va 2 s .c om public void parse(PdfInvoiceSettings settings) { cumulativeCostLocationMap = new HashMap(); cumulativeCostSubtotalLocation = null; totalFundedAmountLocation = null; // locate the cumulative cost heading for aligning cumulative cost amounts TextChunk cumulativeCostHeadingTextChunk = matchText(settings.getCumulativeCostHeadingText()); if (cumulativeCostHeadingTextChunk == null) { log.warn("Failed to locate the cumulative cost heading based on the text [" + settings.getCumulativeCostHeadingText() + "]"); } // locate the ItemIds and their containing rows StringBuilder sb = new StringBuilder(); TextChunk lastChunk = null; String lastItemId = null; for (TextChunk chunk : getTextChunks()) { if (lastChunk == null) { sb.append(chunk.getText()); } else { if (chunk.sameLine(lastChunk)) { // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space if (isChunkAtWordBoundary(chunk, lastChunk) && !startsWithSpace(chunk.getText()) && !endsWithSpace(lastChunk.getText())) { sb.append(' '); } sb.append(chunk.getText()); } else { if (sb.indexOf(settings.getItemIdSeparator()) > 0) { String itemId = sb.substring(0, sb.indexOf(settings.getItemIdSeparator())); cumulativeCostLocationMap.put(itemId, null); lastItemId = itemId; } if (lastItemId != null && sb.toString().matches(settings.getItemRowPattern())) { cumulativeCostLocationMap.put(lastItemId, new Vector(cumulativeCostHeadingTextChunk.getEndLocation().get(Vector.I1), lastChunk.getEndLocation().get(Vector.I2), 0)); lastItemId = null; } sb = new StringBuilder(); sb.append(chunk.getText()); } } lastChunk = chunk; } // check if all the ItemId rows have been located - if not, position based on the location of the ItemId text cumulativeCostLocationMap.keySet().stream() .filter((itemId) -> (cumulativeCostLocationMap.get(itemId) == null)).forEach((itemId) -> { TextChunk itemIdTextChunk = this.matchText(itemId); if (itemIdTextChunk != null) { cumulativeCostLocationMap.replace(itemId, new Vector(cumulativeCostHeadingTextChunk.getEndLocation().get(Vector.I1), itemIdTextChunk.getEndLocation().get(Vector.I2), 0)); } else { log.warn("Failed to locate row for itemId [" + itemId + "]"); } }); // locate where to write the cumulative cost subtotal TextChunk subTotalLabel = matchText(settings.getSubtotalLabelText()); if (subTotalLabel != null) { cumulativeCostSubtotalLocation = new Vector( cumulativeCostHeadingTextChunk.getEndLocation().get(Vector.I1), subTotalLabel.getEndLocation().get(Vector.I2), 0); } // locate where to write the total funded amount TextChunk totalFundedAmountLabel = matchText(settings.getTotalFundedAmountLabelText()); if (totalFundedAmountLabel != null) { totalFundedAmountLocation = new Vector( totalFundedAmountLabel.getEndLocation().get(Vector.I1) + settings.getTotalFundedAmountOffset(), totalFundedAmountLabel.getEndLocation().get(Vector.I2), 0); } }