List of usage examples for com.itextpdf.text.pdf.parser PdfReaderContentParser processContent
public <E extends RenderListener> E processContent(int pageNumber, E renderListener) throws IOException
From source file:be.roots.taconic.pricingguide.util.iTextUtil.java
License:Open Source License
public static Image getImageFromPdf(byte[] pdf) throws IOException, BadElementException { try (final ByteArrayOutputStream bos = new ByteArrayOutputStream()) { final PdfReader reader = new PdfReader(pdf); final PdfReaderContentParser parser = new PdfReaderContentParser(reader); final ImageRenderListener listener = new ImageRenderListener(bos); parser.processContent(1, listener); reader.close();/* w w w .j a v a 2 s . c om*/ return Image.getInstance(bos.toByteArray()); } }
From source file:br.com.smarttaco.util.HelenaBarbosa.java
/** * pdf2txt/*from w w w . j av a 2 s. com*/ * * @param pdf * @param paginas se for <code>null</code> realiza leitura completa. * @param txt * @throws FileNotFoundException * @throws IOException */ private static void pdf2txt(final String pdf, List<Integer> paginas, final String txt) throws FileNotFoundException, IOException { PdfReader reader = new PdfReader(pdf); //System.out.println(reader.getInfo().toString()); if (paginas != null) { reader.selectPages(paginas); } PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(txt, "UTF-8"); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); reader.close(); }
From source file:ch.kostceco.tools.kostsimy.comparison.modulepdfa.impl.PdfExtractJpegModuleImpl.java
License:Open Source License
/** Parses a PDF and extracts all the images. * //from w w w . j a v a 2s . c om * @param src * the source PDF * @param dest * the resulting Image */ public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException { jpegCounter = 0; ccittCounter = 0; jp2Counter = 0; jbig2Counter = 0; try { PdfReader reader = new PdfReader(srcPdf); PdfReaderContentParser parser = new PdfReaderContentParser(reader); MyImageRenderListener listener = new MyImageRenderListener(destImage); for (int i = 1; i <= reader.getNumberOfPages(); i++) { parser.processContent(i, listener); } reader.close(); } catch (IOException e) { getMessageService().logError(getTextResourceService().getText(MESSAGE_XML_MODUL_PDF_EXTRACT) + getTextResourceService().getText(ERROR_XML_UNKNOWN, e.getMessage())); } }
From source file:ch.kostceco.tools.kostval.validation.modulepdfa.impl.ValidationJimageValidationModuleImpl.java
License:Open Source License
/** Parses a PDF and extracts all the images. * //from w ww.j a va 2 s . co m * @param src * the source PDF * @param dest * the resulting PDF */ public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException { PdfReader reader = new PdfReader(srcPdf); PdfReaderContentParser parser = new PdfReaderContentParser(reader); MyImageRenderListener listener = new MyImageRenderListener(destImage); for (int i = 1; i <= reader.getNumberOfPages(); i++) { parser.processContent(i, listener); } reader.close(); }
From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java
License:Open Source License
protected ByteArrayOutputStream doStamper(ByteArrayOutputStream baos) throws IOException, DocumentException { map.clear();/*from w w w . ja v a 2 s .c o m*/ ByteArrayOutputStream baosFinal = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(baos.toByteArray()); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(this); for (int i = 1; i <= reader.getNumberOfPages(); i++) { actualPage = i; PdfDictionary pageDic = reader.getPageN(i); PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, i), resourcesDic); } Document newDocument = new Document(PageSize.A4); PdfWriter writer = PdfWriter.getInstance(newDocument, baosFinal); newDocument.open(); PdfContentByte canvas = writer.getDirectContent(); // Font myFont = FontFactory.getFont(FontFactory.COURIER, 7, Font.BOLD); Iterator<Entry<Integer, Map<Float, StringBuffer>>> it = map.entrySet().iterator(); while (it.hasNext()) { Map.Entry<Integer, Map<Float, StringBuffer>> pairs = (Map.Entry<Integer, Map<Float, StringBuffer>>) it .next(); Iterator<Entry<Float, StringBuffer>> iter = pairs.getValue().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<Float, StringBuffer> actualEntry = iter.next(); canvas.beginText(); canvas.setFontAndSize(myFontBase, 7); canvas.showTextAligned(Element.ALIGN_LEFT, actualEntry.getValue().toString(), 25, actualEntry.getKey() + 60, 0); canvas.endText(); } newDocument.newPage(); } newDocument.close(); reader = new PdfReader(baosFinal.toByteArray()); PdfReaderContentParser parser = new PdfReaderContentParser(reader); TextMarginFinder finder; for (int i = 1; i <= reader.getNumberOfPages(); i++) { finder = parser.processContent(i, new TextMarginFinder()); if (finder.getLly() <= 68) { getWindow().showNotification("Hiba", "A(z) " + i + ".ik oldalon tl sok a szveg !!!", Notification.TYPE_ERROR_MESSAGE); return null; } } reader = new PdfReader(baosFinal.toByteArray()); PdfStamper stamper = new PdfStamper(reader, baosFinal); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; i++) { PdfContentByte overContent = stamper.getOverContent(i); overContent.addImage(logo, 131, 0, 0, 32, 44, 775); getFooterTable(i, n).writeSelectedRows(0, -1, 27, 68, stamper.getOverContent(i)); // ez a jo getIspLogoTable(i, n).writeSelectedRows(0, -1, 425, 45, stamper.getOverContent(i)); } stamper.close(); reader.close(); return baosFinal; }
From source file:com.cloudhub.util.PDFToText.java
License:Apache License
/** * Parses a PDF to a plain text file.//from www . ja v a 2 s. c om * * @param source the original PDF * @param destination the resulting text * @throws IOException */ public static void parsePdf(String source, String destination) throws IOException { PdfReader reader = new PdfReader(source); PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(new FileOutputStream(destination)); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); }
From source file:com.cyberninjas.invoice.pdf.PdfInvoiceEditor.java
License:Open Source License
/** * Parses PDF to determine location of text. * * @throws IOException on I/O error parsing PDF. *///from w w w .j a va 2 s . co m public void parseContent() throws IOException { PdfReader reader = getReader(); PdfReaderContentParser parser = new PdfReaderContentParser(reader); InvoiceTextExtractionStrategy strategy; for (int pageNum = 1; pageNum <= getReader().getNumberOfPages(); pageNum++) { strategy = parser.processContent(pageNum, new InvoiceTextExtractionStrategy()); strategy.parse(settings); for (String itemId : strategy.getCumulativeCostLocationMap().keySet()) { cumulativeCostLocationMap.put(itemId, new PageVector(pageNum, strategy.getCumulativeCostLocationMap().get(itemId))); } //set the location to write the cumulative cost subtotal if (cumulativeCostSubtotalLocation == null && strategy.getCumulativeCostSubtotalLocation() != null) { cumulativeCostSubtotalLocation = new PageVector(pageNum, strategy.getCumulativeCostSubtotalLocation()); } // set the location to write the total funded amount if (totalFundedAmountLocation == null && strategy.getTotalFundedAmountLocation() != null) { totalFundedAmountLocation = new PageVector(pageNum, strategy.getTotalFundedAmountLocation()); } } }
From source file:com.cyberninjas.invoice.pdf.PdfInvoiceEditor.java
License:Open Source License
/** * Write text relative to the matching reference text. * * <p>/*from w ww.ja v a 2 s . com*/ * Note: Due to the way PDF stores text, found blocks of text may contain additional text beyond the reference text. * This can cause blocks to be larger than expected requiring a larger or smaller offset to be set to align * properly.</p> * * @param text the text to write. * @param referenceText the reference text to write relative to. * @param offset the offset to write relative to the reference text. * @param align the alignment. * @param findAll indicates if text should be written at every occurrence or only the first. * @throws IOException on I/O error. */ public final void writeTextAtOffset(String text, String referenceText, float offset, final Alignment align, boolean findAll) throws IOException { PdfReader reader = getReader(); PdfReaderContentParser parser = new PdfReaderContentParser(reader); TextChunkExtractionStrategy strategy; for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { strategy = parser.processContent(pageNum, new TextChunkExtractionStrategy()); if (findAll) { for (TextChunk textChunk : strategy.matchAllText(referenceText)) { this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset, textChunk.getEndLocation().get(Vector.I2)); } } else { TextChunk textChunk = strategy.matchText(referenceText); if (textChunk != null) { this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset, textChunk.getEndLocation().get(Vector.I2)); } } } }
From source file:com.erikHolz.vertretungsplan.Converter.java
License:Open Source License
public void parsePDF() throws IOException { PdfReader reader = new PdfReader(fileDest + ".pdf"); PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(new FileOutputStream(fileDest + "__.txt")); TextExtractionStrategy strategy;//w w w . j a va 2s . c om for (int intI = 1; intI <= reader.getNumberOfPages(); intI++) { strategy = parser.processContent(intI, new LocationTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); reader.close(); // lschen der ursprnglichen pdf File f = new File(fileDest + ".pdf"); if (f.exists()) f.delete(); }
From source file:com.github.naofum.epubconverter.ReadPdf.java
License:Open Source License
public static List<String> getImages(int page) { imageList = new ArrayList<String>(); PdfReaderContentParser parser = new PdfReaderContentParser(reader); renderListener listener = new renderListener(); try {/*from w ww . j a v a 2 s . c om*/ parser.processContent(page, listener); } catch (IOException e) { System.err.println("Failed to extract image " + e.getMessage()); } catch (OutOfMemoryError e) { System.err.println("Out of memory in image extraction " + e.getMessage()); } return imageList; }