Example usage for com.lowagie.text.pdf PdfReader getPageContent

Introduction

In this page you can find the example usage for com.lowagie.text.pdf PdfReader getPageContent.

Prototype

public byte[] getPageContent(int pageNum) throws IOException

Source Link

Document

Gets the contents of the page.

Usage

From source file:net.sf.jsignpdf.UncompressPdf.java

License:Mozilla Public License

/**
 * The main 'main'.//www.  j a  v a  2 s. com
 * 
 * @param args
 */
public static void main(String[] args) {
    if (args == null || args.length == 0) {
        System.out.println("Usage:\njava " + UncompressPdf.class.getName() + " file.pdf [file2.pdf [...]]");
        return;
    }
    Document.compress = false;
    for (String tmpFile : args) {
        String newFileName = null;
        if (tmpFile.toLowerCase().endsWith(".pdf")) {
            newFileName = tmpFile.substring(0, tmpFile.length() - 4) + "_uncompressed.pdf";
        } else {
            newFileName = tmpFile + "_uncompressed.pdf";
        }
        System.out.println("Uncompressing " + tmpFile + " to " + newFileName);
        try {
            PdfReader reader = new PdfReader(tmpFile);
            PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(newFileName), '\0');
            int total = reader.getNumberOfPages() + 1;
            for (int i = 1; i < total; i++) {
                reader.setPageContent(i, reader.getPageContent(i));
            }
            stamper.close();
        } catch (NullPointerException npe) {
            npe.printStackTrace();
        } catch (DocumentException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:org.mnsoft.pdfocr.Wrapper.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void mergePDFs(File foreground, File background, File newFile, String title, String subject,
        String keywords, String author, String creator) {
    log.debug("Merge " + foreground + " (FG) and " + background + " (BG) to " + newFile);

    final double threshold = ((Integer) StringUtility.StringToInteger(getAttribute("THRESHOLD"), 2))
            .doubleValue();//from   ww  w.  j av  a2s.co m

    try {
        /*
         * Foreground: Original Image.
         * Background: OCR'd Text
         */
        final PdfReader fg = new PdfReader(foreground.getAbsolutePath());
        final PdfReader bg = new PdfReader(background.getAbsolutePath());

        /*
         * Count pages for foreground and background
         */
        final int fg_num_pages = fg.getNumberOfPages();
        final int bg_num_pages = bg.getNumberOfPages();

        if (fg_num_pages != bg_num_pages) {
            log.error(
                    "! Foreground and background have different number of pages. This should really not happen.");
        }

        /*
         *  The output document
         */
        final PdfStamper fg_writer = new PdfStamper(fg, new FileOutputStream(newFile));

        /*
         * Create a PdfTemplate from the first page of mark
         * (PdfImportedPage is derived from PdfTemplate)
         */
        PdfImportedPage bg_page = null;
        for (int i = 0; i < fg_num_pages;) {
            ++i;
            System.out.print(" [" + i + "]");

            final byte[] fg_page_content = fg.getPageContent(i);
            final byte[] bg_page_content = bg.getPageContent(i);

            final int bg_size = bg_page_content.length;
            final int fg_size = fg_page_content.length;

            /*
             * If we're not explicitly merging, we're merging
             * the document with itself only anyway.
             */
            if (!"true".equals(getAttribute("mergefiles"))) {
                continue;
            }

            /*
             * Modification 20130904
             *
             * We want to scan only what's not been generated by a number of
             * generators. So, until now, the generator of whom we wanted to
             * ignore files was ocr, i.e. the one we set ourselves. Now, we
             * have seen that when we run an OCR on a "pdf+text" file, as we
             * collate in post the file with its image, we get an overlapping
             * text which is not pixel correct, i.e. which makes the PDF appear
             * not nicely.
             *
             * If the background image is not at least threshold times as large as
             * the foreground image, we assume we've been working on a
             * page that was plain text already, and don't add the image
             * to the background.
             */
            if ((bg_size / fg_size) <= threshold) {
                log.debug("! Not adding background for page " + i + " since background size (" + bg_size
                        + ") not different enough from foreground size (" + fg_size + ").");

                continue;
            }

            bg_page = fg_writer.getImportedPage(bg, i);

            final PdfContentByte contentByte = fg_writer.getUnderContent(i);

            contentByte.addTemplate(bg_page, 0, 0);
        }

        HashMap map = fg_writer.getMoreInfo();
        if (map == null) {
            map = new HashMap();
        }

        if (title != null) {
            map.put("Title", title);
        }

        if (subject != null) {
            map.put("Subject", subject);
        }

        if (keywords != null) {
            map.put("Keywords", keywords);
        }

        if (author != null) {
            map.put("Author", author);
        }

        if (creator != null) {
            map.put("Creator", creator);
        }

        fg_writer.setMoreInfo(map);

        fg_writer.close();

        System.out.println("");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.signserver.client.cli.performance.PerformanceTestPDFServlet.java

License:Open Source License

/** @see org.signserver.client.PerformanceTestTask */
public boolean invoke(int threadId) {
    if (startTime == 0) {
        startTime = System.currentTimeMillis();
    }/* w  w  w  . j  a  v  a 2  s .co m*/
    byte[] testPDF = pdfs
            .get((int) ((System.currentTimeMillis() - startTime) * ((long) pdfs.size()) / runTime));
    URL target;
    try {
        target = new URL(baseURLString);
        InetAddress addr = InetAddress.getByName(target.getHost());
        Socket socket = new Socket(addr, target.getPort());
        OutputStream raw = socket.getOutputStream();
        final int contentLength = REQUEST_CONTENT_WORKERNAME.length() + REQUEST_CONTENT_FILE.length()
                + testPDF.length + REQUEST_CONTENT_END.length();
        final String command = "POST " + target.getPath() + "pdf HTTP/1.0\r\n"
                + "Content-Type: multipart/form-data; boundary=signserver\r\n" + "Content-Length: "
                + contentLength + "\r\n" + "\r\n";
        raw.write(command.getBytes());
        raw.write(REQUEST_CONTENT_WORKERNAME.getBytes());
        raw.write(REQUEST_CONTENT_FILE.getBytes());
        raw.write(testPDF);
        raw.write(REQUEST_CONTENT_END.getBytes());
        raw.flush();

        InputStream in = socket.getInputStream();
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        int len = 0;
        byte[] buf = new byte[1024];
        while ((len = in.read(buf)) > 0) {
            os.write(buf, 0, len);
        }
        in.close();
        os.close();
        byte[] inbytes = os.toByteArray();

        PdfReader pdfReader = new PdfReader(inbytes);
        if (!new String(pdfReader.getPageContent(1)).contains(PDF_CONTENT)) {
            System.err.println("Did not get the same document back..");
            return false;
        }
        pdfReader.close();
        raw.close();
        socket.close();
    } catch (IOException e) {
        System.err.println("testPDF.length=" + testPDF.length + "," + e.getMessage());
        //e.printStackTrace();
        return false;
    }
    return true;
}

From source file:util.PdfUtil.java

License:Open Source License

public String processText(PdfReader reader) throws RedbasinException {
    //Reads in the pdf Template
    StringBuffer sb = new StringBuffer();
    try {//from   w w w.  j a  va2  s .com
        //System.out.println("Number of pages = " + reader.getNumberOfPages());
        int numPages = reader.getNumberOfPages();
        for (int i = 1; i <= numPages; i++) {
            byte[] b = reader.getPageContent(i);
            PRTokeniser token = new PRTokeniser(b);
            //System.out.println("Page " + i);
            while (token.nextToken()) {
                if (token.getTokenType() == 2) {
                    sb.append(token.getStringValue());
                    //System.out.print(token.getStringValue() + " ");
                }
            }
            //System.out.println();
        }
        /*
        PdfStamper stamp = new PdfStamper(reader, new FileOutputStream("aNewPDF.pdf"));
        AcroFields form = stamp.getAcroFields();
        */
        /*
        Map fields = reader.getAcroFields().getFields();
        Iterator iter = fields.keySet().iterator();
        System.out.println("Printing fields" + fields.size());
        while (iter.hasNext()) {
            Object fobj = iter.next();
            Object fval = fields.get(fobj);
            System.out.println("Field = " + fobj.toString() + "Value = " + fobj.toString());
        }
         */

        //set the field values in the pdf form
        /*
        form.setField("fieldName", "aValue");
        stamp.setFormFlattening(true);
        stamp.close();
         * */
    } catch (Exception e) {
        throw new RedbasinException("Some pdf error occurred", e);
    }
    return sb.toString();
}