Example usage for com.lowagie.text.pdf PdfReader getPageContent

List of usage examples for com.lowagie.text.pdf PdfReader getPageContent

Introduction

In this page you can find the example usage for com.lowagie.text.pdf PdfReader getPageContent.

Prototype

public byte[] getPageContent(int pageNum) throws IOException 

Source Link

Document

Gets the contents of the page.

Usage

From source file:net.sf.jsignpdf.UncompressPdf.java

License:Mozilla Public License

/**
 * The main 'main'.//www.  j a  v a  2 s. com
 * 
 * @param args
 */
public static void main(String[] args) {
    if (args == null || args.length == 0) {
        System.out.println("Usage:\njava " + UncompressPdf.class.getName() + " file.pdf [file2.pdf [...]]");
        return;
    }
    Document.compress = false;
    for (String tmpFile : args) {
        String newFileName = null;
        if (tmpFile.toLowerCase().endsWith(".pdf")) {
            newFileName = tmpFile.substring(0, tmpFile.length() - 4) + "_uncompressed.pdf";
        } else {
            newFileName = tmpFile + "_uncompressed.pdf";
        }
        System.out.println("Uncompressing " + tmpFile + " to " + newFileName);
        try {
            PdfReader reader = new PdfReader(tmpFile);
            PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(newFileName), '\0');
            int total = reader.getNumberOfPages() + 1;
            for (int i = 1; i < total; i++) {
                reader.setPageContent(i, reader.getPageContent(i));
            }
            stamper.close();
        } catch (NullPointerException npe) {
            npe.printStackTrace();
        } catch (DocumentException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:org.mnsoft.pdfocr.Wrapper.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void mergePDFs(File foreground, File background, File newFile, String title, String subject,
        String keywords, String author, String creator) {
    log.debug("Merge " + foreground + " (FG) and " + background + " (BG) to " + newFile);

    final double threshold = ((Integer) StringUtility.StringToInteger(getAttribute("THRESHOLD"), 2))
            .doubleValue();//from   ww  w.  j av  a2s.co m

    try {
        /*
         * Foreground: Original Image.
         * Background: OCR'd Text
         */
        final PdfReader fg = new PdfReader(foreground.getAbsolutePath());
        final PdfReader bg = new PdfReader(background.getAbsolutePath());

        /*
         * Count pages for foreground and background
         */
        final int fg_num_pages = fg.getNumberOfPages();
        final int bg_num_pages = bg.getNumberOfPages();

        if (fg_num_pages != bg_num_pages) {
            log.error(
                    "! Foreground and background have different number of pages. This should really not happen.");
        }

        /*
         *  The output document
         */
        final PdfStamper fg_writer = new PdfStamper(fg, new FileOutputStream(newFile));

        /*
         * Create a PdfTemplate from the first page of mark
         * (PdfImportedPage is derived from PdfTemplate)
         */
        PdfImportedPage bg_page = null;
        for (int i = 0; i < fg_num_pages;) {
            ++i;
            System.out.print(" [" + i + "]");

            final byte[] fg_page_content = fg.getPageContent(i);
            final byte[] bg_page_content = bg.getPageContent(i);

            final int bg_size = bg_page_content.length;
            final int fg_size = fg_page_content.length;

            /*
             * If we're not explicitly merging, we're merging
             * the document with itself only anyway.
             */
            if (!"true".equals(getAttribute("mergefiles"))) {
                continue;
            }

            /*
             * Modification 20130904
             *
             * We want to scan only what's not been generated by a number of
             * generators. So, until now, the generator of whom we wanted to
             * ignore files was ocr, i.e. the one we set ourselves. Now, we
             * have seen that when we run an OCR on a "pdf+text" file, as we
             * collate in post the file with its image, we get an overlapping
             * text which is not pixel correct, i.e. which makes the PDF appear
             * not nicely.
             *
             * If the background image is not at least threshold times as large as
             * the foreground image, we assume we've been working on a
             * page that was plain text already, and don't add the image
             * to the background.
             */
            if ((bg_size / fg_size) <= threshold) {
                log.debug("! Not adding background for page " + i + " since background size (" + bg_size
                        + ") not different enough from foreground size (" + fg_size + ").");

                continue;
            }

            bg_page = fg_writer.getImportedPage(bg, i);

            final PdfContentByte contentByte = fg_writer.getUnderContent(i);

            contentByte.addTemplate(bg_page, 0, 0);
        }

        HashMap map = fg_writer.getMoreInfo();
        if (map == null) {
            map = new HashMap();
        }

        if (title != null) {
            map.put("Title", title);
        }

        if (subject != null) {
            map.put("Subject", subject);
        }

        if (keywords != null) {
            map.put("Keywords", keywords);
        }

        if (author != null) {
            map.put("Author", author);
        }

        if (creator != null) {
            map.put("Creator", creator);
        }

        fg_writer.setMoreInfo(map);

        fg_writer.close();

        System.out.println("");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.signserver.client.cli.performance.PerformanceTestPDFServlet.java

License:Open Source License

/** @see org.signserver.client.PerformanceTestTask */
public boolean invoke(int threadId) {
    if (startTime == 0) {
        startTime = System.currentTimeMillis();
    }/* w  w  w  . j  a  v  a 2  s .co m*/
    byte[] testPDF = pdfs
            .get((int) ((System.currentTimeMillis() - startTime) * ((long) pdfs.size()) / runTime));
    URL target;
    try {
        target = new URL(baseURLString);
        InetAddress addr = InetAddress.getByName(target.getHost());
        Socket socket = new Socket(addr, target.getPort());
        OutputStream raw = socket.getOutputStream();
        final int contentLength = REQUEST_CONTENT_WORKERNAME.length() + REQUEST_CONTENT_FILE.length()
                + testPDF.length + REQUEST_CONTENT_END.length();
        final String command = "POST " + target.getPath() + "pdf HTTP/1.0\r\n"
                + "Content-Type: multipart/form-data; boundary=signserver\r\n" + "Content-Length: "
                + contentLength + "\r\n" + "\r\n";
        raw.write(command.getBytes());
        raw.write(REQUEST_CONTENT_WORKERNAME.getBytes());
        raw.write(REQUEST_CONTENT_FILE.getBytes());
        raw.write(testPDF);
        raw.write(REQUEST_CONTENT_END.getBytes());
        raw.flush();

        InputStream in = socket.getInputStream();
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        int len = 0;
        byte[] buf = new byte[1024];
        while ((len = in.read(buf)) > 0) {
            os.write(buf, 0, len);
        }
        in.close();
        os.close();
        byte[] inbytes = os.toByteArray();

        PdfReader pdfReader = new PdfReader(inbytes);
        if (!new String(pdfReader.getPageContent(1)).contains(PDF_CONTENT)) {
            System.err.println("Did not get the same document back..");
            return false;
        }
        pdfReader.close();
        raw.close();
        socket.close();
    } catch (IOException e) {
        System.err.println("testPDF.length=" + testPDF.length + "," + e.getMessage());
        //e.printStackTrace();
        return false;
    }
    return true;
}

From source file:util.PdfUtil.java

License:Open Source License

public String processText(PdfReader reader) throws RedbasinException {
    //Reads in the pdf Template
    StringBuffer sb = new StringBuffer();
    try {//from   w w w.  j a  va2  s .com
        //System.out.println("Number of pages = " + reader.getNumberOfPages());
        int numPages = reader.getNumberOfPages();
        for (int i = 1; i <= numPages; i++) {
            byte[] b = reader.getPageContent(i);
            PRTokeniser token = new PRTokeniser(b);
            //System.out.println("Page " + i);
            while (token.nextToken()) {
                if (token.getTokenType() == 2) {
                    sb.append(token.getStringValue());
                    //System.out.print(token.getStringValue() + " ");
                }
            }
            //System.out.println();
        }
        /*
        PdfStamper stamp = new PdfStamper(reader, new FileOutputStream("aNewPDF.pdf"));
        AcroFields form = stamp.getAcroFields();
        */
        /*
        Map fields = reader.getAcroFields().getFields();
        Iterator iter = fields.keySet().iterator();
        System.out.println("Printing fields" + fields.size());
        while (iter.hasNext()) {
            Object fobj = iter.next();
            Object fval = fields.get(fobj);
            System.out.println("Field = " + fobj.toString() + "Value = " + fobj.toString());
        }
         */

        //set the field values in the pdf form
        /*
        form.setField("fieldName", "aValue");
        stamp.setFormFlattening(true);
        stamp.close();
         * */
    } catch (Exception e) {
        throw new RedbasinException("Some pdf error occurred", e);
    }
    return sb.toString();
}