List of usage examples for com.itextpdf.text.pdf.parser ContentByteUtils getContentBytesForPage
public static byte[] getContentBytesForPage(PdfReader reader, int pageNum) throws IOException
From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java
License:Open Source License
protected ByteArrayOutputStream doStamper(ByteArrayOutputStream baos) throws IOException, DocumentException { map.clear();//from w w w . j a va 2s .c o m ByteArrayOutputStream baosFinal = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(baos.toByteArray()); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(this); for (int i = 1; i <= reader.getNumberOfPages(); i++) { actualPage = i; PdfDictionary pageDic = reader.getPageN(i); PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, i), resourcesDic); } Document newDocument = new Document(PageSize.A4); PdfWriter writer = PdfWriter.getInstance(newDocument, baosFinal); newDocument.open(); PdfContentByte canvas = writer.getDirectContent(); // Font myFont = FontFactory.getFont(FontFactory.COURIER, 7, Font.BOLD); Iterator<Entry<Integer, Map<Float, StringBuffer>>> it = map.entrySet().iterator(); while (it.hasNext()) { Map.Entry<Integer, Map<Float, StringBuffer>> pairs = (Map.Entry<Integer, Map<Float, StringBuffer>>) it .next(); Iterator<Entry<Float, StringBuffer>> iter = pairs.getValue().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<Float, StringBuffer> actualEntry = iter.next(); canvas.beginText(); canvas.setFontAndSize(myFontBase, 7); canvas.showTextAligned(Element.ALIGN_LEFT, actualEntry.getValue().toString(), 25, actualEntry.getKey() + 60, 0); canvas.endText(); } newDocument.newPage(); } newDocument.close(); reader = new PdfReader(baosFinal.toByteArray()); PdfReaderContentParser parser = new PdfReaderContentParser(reader); TextMarginFinder finder; for (int i = 1; i <= reader.getNumberOfPages(); i++) { finder = parser.processContent(i, new TextMarginFinder()); if (finder.getLly() <= 68) { getWindow().showNotification("Hiba", "A(z) " + i + ".ik oldalon tl sok a szveg !!!", Notification.TYPE_ERROR_MESSAGE); return null; } } reader = new PdfReader(baosFinal.toByteArray()); PdfStamper stamper = new PdfStamper(reader, baosFinal); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; i++) { PdfContentByte overContent = stamper.getOverContent(i); overContent.addImage(logo, 131, 0, 0, 32, 44, 775); getFooterTable(i, n).writeSelectedRows(0, -1, 27, 68, stamper.getOverContent(i)); // ez a jo getIspLogoTable(i, n).writeSelectedRows(0, -1, 425, 45, stamper.getOverContent(i)); } stamper.close(); reader.close(); return baosFinal; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
private void cleanUpPage(int pageNum, List<PdfCleanUpLocation> cleanUpLocations) throws IOException, DocumentException { if (cleanUpLocations.size() == 0) { return;//from w w w .j av a 2 s .c o m } PdfReader pdfReader = pdfStamper.getReader(); PdfDictionary page = pdfReader.getPageN(pageNum); PdfContentByte canvas = pdfStamper.getUnderContent(pageNum); byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum); page.remove(PdfName.CONTENTS); canvas.saveState(); PdfCleanUpRegionFilter filter = createFilter(cleanUpLocations); PdfCleanUpRenderListener pdfCleanUpRenderListener = new PdfCleanUpRenderListener(pdfStamper, filter); pdfCleanUpRenderListener.registerNewContext(pdfReader.getPageResources(page), canvas); PdfContentStreamProcessor contentProcessor = new PdfContentStreamProcessor(pdfCleanUpRenderListener); PdfCleanUpContentOperator.populateOperators(contentProcessor, pdfCleanUpRenderListener); contentProcessor.processContent(pageContentInput, page.getAsDict(PdfName.RESOURCES)); pdfCleanUpRenderListener.popContext(); canvas.restoreState(); colorCleanedLocations(canvas, cleanUpLocations); if (redactAnnotIndirRefs != null) { // if it isn't null, then we are in "extract locations from redact annots" mode deleteRedactAnnots(pageNum); } }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Extracts text chunks from PDF using iText and stores them in BxDocument object. * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs, * they correspond to single string operands of PDF's text-showing operators * (Tj, TJ, ' and ").//from w w w . j ava 2 s . co m * @param stream PDF's stream * @return BxDocument containing pages with extracted chunks stored as BxChunk lists * @throws AnalysisException AnalysisException */ @Override public BxDocument extractCharacters(InputStream stream) throws AnalysisException { try { BxDocumentCreator documentCreator = new BxDocumentCreator(); PdfReader reader = new PdfReader(stream); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator); for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) { if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) { continue; } documentCreator.processNewBxPage(reader.getPageSize(pageNumber)); PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES); processAlternativeFontNames(resources); processAlternativeColorSpace(resources); processor.reset(); processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources); TimeoutRegister.get().check(); } BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document)); if (doc.getFirstChild() == null) { throw new AnalysisException("Document contains no pages"); } return doc; } catch (InvalidPdfException ex) { throw new AnalysisException("Invalid PDF file", ex); } catch (IOException ex) { throw new AnalysisException("Cannot extract characters from PDF file", ex); } }