Example usage for com.itextpdf.text.pdf.parser PdfContentStreamProcessor processContent

List of usage examples for com.itextpdf.text.pdf.parser PdfContentStreamProcessor processContent

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf.parser PdfContentStreamProcessor processContent.

Prototype

public void processContent(byte[] contentBytes, PdfDictionary resources) 

Source Link

Document

Processes PDF syntax.

Usage

From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java

License:Open Source License

protected ByteArrayOutputStream doStamper(ByteArrayOutputStream baos) throws IOException, DocumentException {

    map.clear();/*from ww w.  j  a  v a  2 s  .c  o  m*/
    ByteArrayOutputStream baosFinal = new ByteArrayOutputStream();
    PdfReader reader = new PdfReader(baos.toByteArray());

    PdfContentStreamProcessor processor = new PdfContentStreamProcessor(this);
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        actualPage = i;
        PdfDictionary pageDic = reader.getPageN(i);
        PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
        processor.processContent(ContentByteUtils.getContentBytesForPage(reader, i), resourcesDic);
    }

    Document newDocument = new Document(PageSize.A4);
    PdfWriter writer = PdfWriter.getInstance(newDocument, baosFinal);

    newDocument.open();
    PdfContentByte canvas = writer.getDirectContent();
    //      Font myFont = FontFactory.getFont(FontFactory.COURIER, 7, Font.BOLD);
    Iterator<Entry<Integer, Map<Float, StringBuffer>>> it = map.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry<Integer, Map<Float, StringBuffer>> pairs = (Map.Entry<Integer, Map<Float, StringBuffer>>) it
                .next();

        Iterator<Entry<Float, StringBuffer>> iter = pairs.getValue().entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<Float, StringBuffer> actualEntry = iter.next();
            canvas.beginText();
            canvas.setFontAndSize(myFontBase, 7);
            canvas.showTextAligned(Element.ALIGN_LEFT, actualEntry.getValue().toString(), 25,
                    actualEntry.getKey() + 60, 0);
            canvas.endText();
        }
        newDocument.newPage();
    }
    newDocument.close();

    reader = new PdfReader(baosFinal.toByteArray());

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        finder = parser.processContent(i, new TextMarginFinder());
        if (finder.getLly() <= 68) {
            getWindow().showNotification("Hiba", "A(z) " + i + ".ik oldalon tl sok a szveg !!!",
                    Notification.TYPE_ERROR_MESSAGE);
            return null;
        }
    }

    reader = new PdfReader(baosFinal.toByteArray());
    PdfStamper stamper = new PdfStamper(reader, baosFinal);
    int n = reader.getNumberOfPages();
    for (int i = 1; i <= n; i++) {
        PdfContentByte overContent = stamper.getOverContent(i);
        overContent.addImage(logo, 131, 0, 0, 32, 44, 775);
        getFooterTable(i, n).writeSelectedRows(0, -1, 27, 68, stamper.getOverContent(i)); // ez a jo
        getIspLogoTable(i, n).writeSelectedRows(0, -1, 425, 45, stamper.getOverContent(i));
    }
    stamper.close();
    reader.close();
    return baosFinal;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

private void cleanUpPage(int pageNum, List<PdfCleanUpLocation> cleanUpLocations)
        throws IOException, DocumentException {
    if (cleanUpLocations.size() == 0) {
        return;//from ww  w . ja v  a 2 s  .c  o  m
    }

    PdfReader pdfReader = pdfStamper.getReader();
    PdfDictionary page = pdfReader.getPageN(pageNum);
    PdfContentByte canvas = pdfStamper.getUnderContent(pageNum);
    byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum);
    page.remove(PdfName.CONTENTS);

    canvas.saveState();

    PdfCleanUpRegionFilter filter = createFilter(cleanUpLocations);
    PdfCleanUpRenderListener pdfCleanUpRenderListener = new PdfCleanUpRenderListener(pdfStamper, filter);
    pdfCleanUpRenderListener.registerNewContext(pdfReader.getPageResources(page), canvas);

    PdfContentStreamProcessor contentProcessor = new PdfContentStreamProcessor(pdfCleanUpRenderListener);
    PdfCleanUpContentOperator.populateOperators(contentProcessor, pdfCleanUpRenderListener);
    contentProcessor.processContent(pageContentInput, page.getAsDict(PdfName.RESOURCES));
    pdfCleanUpRenderListener.popContext();

    canvas.restoreState();

    colorCleanedLocations(canvas, cleanUpLocations);

    if (redactAnnotIndirRefs != null) { // if it isn't null, then we are in "extract locations from redact annots" mode
        deleteRedactAnnots(pageNum);
    }
}

From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java

License:Open Source License

/**
 * Extracts text chunks from PDF using iText and stores them in BxDocument object.
 * Depending on parsed PDF, extracted text chunks may or may not be individual glyphs,
 * they correspond to single string operands of PDF's text-showing operators
 * (Tj, TJ, ' and ").//from w  ww .j  a v  a 2s  .c  o m
 * @param stream PDF's stream
 * @return BxDocument containing pages with extracted chunks stored as BxChunk lists
 * @throws AnalysisException AnalysisException
 */
@Override
public BxDocument extractCharacters(InputStream stream) throws AnalysisException {
    try {
        BxDocumentCreator documentCreator = new BxDocumentCreator();

        PdfReader reader = new PdfReader(stream);
        PdfContentStreamProcessor processor = new PdfContentStreamProcessor(documentCreator);

        for (int pageNumber = 1; pageNumber <= reader.getNumberOfPages(); pageNumber++) {
            if (frontPagesLimit > 0 && backPagesLimit > 0 && pageNumber > frontPagesLimit
                    && pageNumber < reader.getNumberOfPages() - 1 - backPagesLimit) {
                continue;
            }
            documentCreator.processNewBxPage(reader.getPageSize(pageNumber));

            PdfDictionary resources = reader.getPageN(pageNumber).getAsDict(PdfName.RESOURCES);
            processAlternativeFontNames(resources);
            processAlternativeColorSpace(resources);

            processor.reset();
            processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNumber), resources);
            TimeoutRegister.get().check();
        }

        BxDocument doc = filterComponents(removeDuplicateChunks(documentCreator.document));
        if (doc.getFirstChild() == null) {
            throw new AnalysisException("Document contains no pages");
        }
        return doc;
    } catch (InvalidPdfException ex) {
        throw new AnalysisException("Invalid PDF file", ex);
    } catch (IOException ex) {
        throw new AnalysisException("Cannot extract characters from PDF file", ex);
    }
}