Example usage for com.itextpdf.text.pdf.parser PdfReaderContentParser PdfReaderContentParser

List of usage examples for com.itextpdf.text.pdf.parser PdfReaderContentParser PdfReaderContentParser

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf.parser PdfReaderContentParser PdfReaderContentParser.

Prototype

public PdfReaderContentParser(PdfReader reader) 

Source Link

Usage

From source file:be.roots.taconic.pricingguide.util.iTextUtil.java

License:Open Source License

public static Image getImageFromPdf(byte[] pdf) throws IOException, BadElementException {

    try (final ByteArrayOutputStream bos = new ByteArrayOutputStream()) {

        final PdfReader reader = new PdfReader(pdf);
        final PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        final ImageRenderListener listener = new ImageRenderListener(bos);

        parser.processContent(1, listener);

        reader.close();/*from  w  w  w.  java2s .  c  o m*/

        return Image.getInstance(bos.toByteArray());
    }

}

From source file:br.com.smarttaco.util.HelenaBarbosa.java

/**
 * pdf2txt//  w w  w  . j  av  a 2s  .  c  o m
 *
 * @param pdf
 * @param paginas se for <code>null</code> realiza leitura completa.
 * @param txt
 * @throws FileNotFoundException
 * @throws IOException
 */
private static void pdf2txt(final String pdf, List<Integer> paginas, final String txt)
        throws FileNotFoundException, IOException {
    PdfReader reader = new PdfReader(pdf);
    //System.out.println(reader.getInfo().toString());
    if (paginas != null) {
        reader.selectPages(paginas);
    }
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(txt, "UTF-8");
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
    reader.close();
}

From source file:ch.kostceco.tools.kostsimy.comparison.modulepdfa.impl.PdfExtractJpegModuleImpl.java

License:Open Source License

/** Parses a PDF and extracts all the images.
 * //  www. j ava  2s .  com
 * @param src
 *          the source PDF
 * @param dest
 *          the resulting Image */
public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException {
    jpegCounter = 0;
    ccittCounter = 0;
    jp2Counter = 0;
    jbig2Counter = 0;
    try {
        PdfReader reader = new PdfReader(srcPdf);
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        MyImageRenderListener listener = new MyImageRenderListener(destImage);
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            parser.processContent(i, listener);
        }
        reader.close();
    } catch (IOException e) {
        getMessageService().logError(getTextResourceService().getText(MESSAGE_XML_MODUL_PDF_EXTRACT)
                + getTextResourceService().getText(ERROR_XML_UNKNOWN, e.getMessage()));
    }
}

From source file:ch.kostceco.tools.kostval.validation.modulepdfa.impl.ValidationJimageValidationModuleImpl.java

License:Open Source License

/** Parses a PDF and extracts all the images.
 * //from w w w.j  ava2s  .  c om
 * @param src
 *          the source PDF
 * @param dest
 *          the resulting PDF */
public void extractImages(String srcPdf, String destImage) throws IOException, DocumentException {
    PdfReader reader = new PdfReader(srcPdf);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    MyImageRenderListener listener = new MyImageRenderListener(destImage);
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        parser.processContent(i, listener);
    }
    reader.close();
}

From source file:com.cib.statementstamper.windows.StatementStamperMainWindow.java

License:Open Source License

protected ByteArrayOutputStream doStamper(ByteArrayOutputStream baos) throws IOException, DocumentException {

    map.clear();//from   ww  w  .j a  v a  2s . co  m
    ByteArrayOutputStream baosFinal = new ByteArrayOutputStream();
    PdfReader reader = new PdfReader(baos.toByteArray());

    PdfContentStreamProcessor processor = new PdfContentStreamProcessor(this);
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        actualPage = i;
        PdfDictionary pageDic = reader.getPageN(i);
        PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
        processor.processContent(ContentByteUtils.getContentBytesForPage(reader, i), resourcesDic);
    }

    Document newDocument = new Document(PageSize.A4);
    PdfWriter writer = PdfWriter.getInstance(newDocument, baosFinal);

    newDocument.open();
    PdfContentByte canvas = writer.getDirectContent();
    //      Font myFont = FontFactory.getFont(FontFactory.COURIER, 7, Font.BOLD);
    Iterator<Entry<Integer, Map<Float, StringBuffer>>> it = map.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry<Integer, Map<Float, StringBuffer>> pairs = (Map.Entry<Integer, Map<Float, StringBuffer>>) it
                .next();

        Iterator<Entry<Float, StringBuffer>> iter = pairs.getValue().entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<Float, StringBuffer> actualEntry = iter.next();
            canvas.beginText();
            canvas.setFontAndSize(myFontBase, 7);
            canvas.showTextAligned(Element.ALIGN_LEFT, actualEntry.getValue().toString(), 25,
                    actualEntry.getKey() + 60, 0);
            canvas.endText();
        }
        newDocument.newPage();
    }
    newDocument.close();

    reader = new PdfReader(baosFinal.toByteArray());

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        finder = parser.processContent(i, new TextMarginFinder());
        if (finder.getLly() <= 68) {
            getWindow().showNotification("Hiba", "A(z) " + i + ".ik oldalon tl sok a szveg !!!",
                    Notification.TYPE_ERROR_MESSAGE);
            return null;
        }
    }

    reader = new PdfReader(baosFinal.toByteArray());
    PdfStamper stamper = new PdfStamper(reader, baosFinal);
    int n = reader.getNumberOfPages();
    for (int i = 1; i <= n; i++) {
        PdfContentByte overContent = stamper.getOverContent(i);
        overContent.addImage(logo, 131, 0, 0, 32, 44, 775);
        getFooterTable(i, n).writeSelectedRows(0, -1, 27, 68, stamper.getOverContent(i)); // ez a jo
        getIspLogoTable(i, n).writeSelectedRows(0, -1, 425, 45, stamper.getOverContent(i));
    }
    stamper.close();
    reader.close();
    return baosFinal;
}

From source file:com.cloudhub.util.PDFToText.java

License:Apache License

/**
 * Parses a PDF to a plain text file.//from  www. ja v a 2  s.  co m
 *
 * @param source the original PDF
 * @param destination the resulting text
 * @throws IOException
 */
public static void parsePdf(String source, String destination) throws IOException {
    PdfReader reader = new PdfReader(source);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(destination));
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
}

From source file:com.cyberninjas.invoice.pdf.PdfInvoiceEditor.java

License:Open Source License

/**
 * Parses PDF to determine location of text.
 *
 * @throws IOException on I/O error parsing PDF.
 *///from  w  ww .  j  a va  2 s  .c o m
public void parseContent() throws IOException {
    PdfReader reader = getReader();

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);

    InvoiceTextExtractionStrategy strategy;

    for (int pageNum = 1; pageNum <= getReader().getNumberOfPages(); pageNum++) {
        strategy = parser.processContent(pageNum, new InvoiceTextExtractionStrategy());

        strategy.parse(settings);

        for (String itemId : strategy.getCumulativeCostLocationMap().keySet()) {
            cumulativeCostLocationMap.put(itemId,
                    new PageVector(pageNum, strategy.getCumulativeCostLocationMap().get(itemId)));
        }

        //set the location to write the cumulative cost subtotal
        if (cumulativeCostSubtotalLocation == null && strategy.getCumulativeCostSubtotalLocation() != null) {
            cumulativeCostSubtotalLocation = new PageVector(pageNum,
                    strategy.getCumulativeCostSubtotalLocation());
        }

        // set the location to write the total funded amount
        if (totalFundedAmountLocation == null && strategy.getTotalFundedAmountLocation() != null) {
            totalFundedAmountLocation = new PageVector(pageNum, strategy.getTotalFundedAmountLocation());
        }
    }
}

From source file:com.cyberninjas.invoice.pdf.PdfInvoiceEditor.java

License:Open Source License

/**
 * Write text relative to the matching reference text.
 *
 * <p>/*w  w w .ja v  a 2s.co m*/
 * Note: Due to the way PDF stores text, found blocks of text may contain additional text beyond the reference text.
 * This can cause blocks to be larger than expected requiring a larger or smaller offset to be set to align
 * properly.</p>
 *
 * @param text the text to write.
 * @param referenceText the reference text to write relative to.
 * @param offset the offset to write relative to the reference text.
 * @param align the alignment.
 * @param findAll indicates if text should be written at every occurrence or only the first.
 * @throws IOException on I/O error.
 */
public final void writeTextAtOffset(String text, String referenceText, float offset, final Alignment align,
        boolean findAll) throws IOException {
    PdfReader reader = getReader();

    PdfReaderContentParser parser = new PdfReaderContentParser(reader);

    TextChunkExtractionStrategy strategy;

    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        strategy = parser.processContent(pageNum, new TextChunkExtractionStrategy());

        if (findAll) {
            for (TextChunk textChunk : strategy.matchAllText(referenceText)) {
                this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset,
                        textChunk.getEndLocation().get(Vector.I2));
            }
        } else {
            TextChunk textChunk = strategy.matchText(referenceText);

            if (textChunk != null) {
                this.writeText(pageNum, text, align, textChunk.getEndLocation().get(Vector.I1) + offset,
                        textChunk.getEndLocation().get(Vector.I2));
            }
        }
    }
}

From source file:com.erikHolz.vertretungsplan.Converter.java

License:Open Source License

public void parsePDF() throws IOException {

    PdfReader reader = new PdfReader(fileDest + ".pdf");
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(fileDest + "__.txt"));

    TextExtractionStrategy strategy;/* ww  w. ja v  a  2 s  .  c  o m*/
    for (int intI = 1; intI <= reader.getNumberOfPages(); intI++) {
        strategy = parser.processContent(intI, new LocationTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }

    out.flush();
    out.close();
    reader.close();

    // lschen der ursprnglichen pdf
    File f = new File(fileDest + ".pdf");
    if (f.exists())
        f.delete();
}

From source file:com.github.naofum.epubconverter.ReadPdf.java

License:Open Source License

public static List<String> getImages(int page) {
    imageList = new ArrayList<String>();
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    renderListener listener = new renderListener();
    try {/*from w ww .j  a v  a2  s .com*/
        parser.processContent(page, listener);
    } catch (IOException e) {
        System.err.println("Failed to extract image " + e.getMessage());
    } catch (OutOfMemoryError e) {
        System.err.println("Out of memory in image extraction " + e.getMessage());
    }
    return imageList;
}