List of usage examples for com.lowagie.text.pdf PdfReader getPageContent
public byte[] getPageContent(int pageNum) throws IOException
From source file:net.sf.jsignpdf.UncompressPdf.java
License:Mozilla Public License
/** * The main 'main'.//www. j a v a 2 s. com * * @param args */ public static void main(String[] args) { if (args == null || args.length == 0) { System.out.println("Usage:\njava " + UncompressPdf.class.getName() + " file.pdf [file2.pdf [...]]"); return; } Document.compress = false; for (String tmpFile : args) { String newFileName = null; if (tmpFile.toLowerCase().endsWith(".pdf")) { newFileName = tmpFile.substring(0, tmpFile.length() - 4) + "_uncompressed.pdf"; } else { newFileName = tmpFile + "_uncompressed.pdf"; } System.out.println("Uncompressing " + tmpFile + " to " + newFileName); try { PdfReader reader = new PdfReader(tmpFile); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(newFileName), '\0'); int total = reader.getNumberOfPages() + 1; for (int i = 1; i < total; i++) { reader.setPageContent(i, reader.getPageContent(i)); } stamper.close(); } catch (NullPointerException npe) { npe.printStackTrace(); } catch (DocumentException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
From source file:org.mnsoft.pdfocr.Wrapper.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" }) private void mergePDFs(File foreground, File background, File newFile, String title, String subject, String keywords, String author, String creator) { log.debug("Merge " + foreground + " (FG) and " + background + " (BG) to " + newFile); final double threshold = ((Integer) StringUtility.StringToInteger(getAttribute("THRESHOLD"), 2)) .doubleValue();//from ww w. j av a2s.co m try { /* * Foreground: Original Image. * Background: OCR'd Text */ final PdfReader fg = new PdfReader(foreground.getAbsolutePath()); final PdfReader bg = new PdfReader(background.getAbsolutePath()); /* * Count pages for foreground and background */ final int fg_num_pages = fg.getNumberOfPages(); final int bg_num_pages = bg.getNumberOfPages(); if (fg_num_pages != bg_num_pages) { log.error( "! Foreground and background have different number of pages. This should really not happen."); } /* * The output document */ final PdfStamper fg_writer = new PdfStamper(fg, new FileOutputStream(newFile)); /* * Create a PdfTemplate from the first page of mark * (PdfImportedPage is derived from PdfTemplate) */ PdfImportedPage bg_page = null; for (int i = 0; i < fg_num_pages;) { ++i; System.out.print(" [" + i + "]"); final byte[] fg_page_content = fg.getPageContent(i); final byte[] bg_page_content = bg.getPageContent(i); final int bg_size = bg_page_content.length; final int fg_size = fg_page_content.length; /* * If we're not explicitly merging, we're merging * the document with itself only anyway. */ if (!"true".equals(getAttribute("mergefiles"))) { continue; } /* * Modification 20130904 * * We want to scan only what's not been generated by a number of * generators. So, until now, the generator of whom we wanted to * ignore files was ocr, i.e. the one we set ourselves. Now, we * have seen that when we run an OCR on a "pdf+text" file, as we * collate in post the file with its image, we get an overlapping * text which is not pixel correct, i.e. which makes the PDF appear * not nicely. * * If the background image is not at least threshold times as large as * the foreground image, we assume we've been working on a * page that was plain text already, and don't add the image * to the background. */ if ((bg_size / fg_size) <= threshold) { log.debug("! Not adding background for page " + i + " since background size (" + bg_size + ") not different enough from foreground size (" + fg_size + ")."); continue; } bg_page = fg_writer.getImportedPage(bg, i); final PdfContentByte contentByte = fg_writer.getUnderContent(i); contentByte.addTemplate(bg_page, 0, 0); } HashMap map = fg_writer.getMoreInfo(); if (map == null) { map = new HashMap(); } if (title != null) { map.put("Title", title); } if (subject != null) { map.put("Subject", subject); } if (keywords != null) { map.put("Keywords", keywords); } if (author != null) { map.put("Author", author); } if (creator != null) { map.put("Creator", creator); } fg_writer.setMoreInfo(map); fg_writer.close(); System.out.println(""); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.signserver.client.cli.performance.PerformanceTestPDFServlet.java
License:Open Source License
/** @see org.signserver.client.PerformanceTestTask */ public boolean invoke(int threadId) { if (startTime == 0) { startTime = System.currentTimeMillis(); }/* w w w . j a v a 2 s .co m*/ byte[] testPDF = pdfs .get((int) ((System.currentTimeMillis() - startTime) * ((long) pdfs.size()) / runTime)); URL target; try { target = new URL(baseURLString); InetAddress addr = InetAddress.getByName(target.getHost()); Socket socket = new Socket(addr, target.getPort()); OutputStream raw = socket.getOutputStream(); final int contentLength = REQUEST_CONTENT_WORKERNAME.length() + REQUEST_CONTENT_FILE.length() + testPDF.length + REQUEST_CONTENT_END.length(); final String command = "POST " + target.getPath() + "pdf HTTP/1.0\r\n" + "Content-Type: multipart/form-data; boundary=signserver\r\n" + "Content-Length: " + contentLength + "\r\n" + "\r\n"; raw.write(command.getBytes()); raw.write(REQUEST_CONTENT_WORKERNAME.getBytes()); raw.write(REQUEST_CONTENT_FILE.getBytes()); raw.write(testPDF); raw.write(REQUEST_CONTENT_END.getBytes()); raw.flush(); InputStream in = socket.getInputStream(); ByteArrayOutputStream os = new ByteArrayOutputStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = in.read(buf)) > 0) { os.write(buf, 0, len); } in.close(); os.close(); byte[] inbytes = os.toByteArray(); PdfReader pdfReader = new PdfReader(inbytes); if (!new String(pdfReader.getPageContent(1)).contains(PDF_CONTENT)) { System.err.println("Did not get the same document back.."); return false; } pdfReader.close(); raw.close(); socket.close(); } catch (IOException e) { System.err.println("testPDF.length=" + testPDF.length + "," + e.getMessage()); //e.printStackTrace(); return false; } return true; }
From source file:util.PdfUtil.java
License:Open Source License
public String processText(PdfReader reader) throws RedbasinException { //Reads in the pdf Template StringBuffer sb = new StringBuffer(); try {//from w w w. j a va2 s .com //System.out.println("Number of pages = " + reader.getNumberOfPages()); int numPages = reader.getNumberOfPages(); for (int i = 1; i <= numPages; i++) { byte[] b = reader.getPageContent(i); PRTokeniser token = new PRTokeniser(b); //System.out.println("Page " + i); while (token.nextToken()) { if (token.getTokenType() == 2) { sb.append(token.getStringValue()); //System.out.print(token.getStringValue() + " "); } } //System.out.println(); } /* PdfStamper stamp = new PdfStamper(reader, new FileOutputStream("aNewPDF.pdf")); AcroFields form = stamp.getAcroFields(); */ /* Map fields = reader.getAcroFields().getFields(); Iterator iter = fields.keySet().iterator(); System.out.println("Printing fields" + fields.size()); while (iter.hasNext()) { Object fobj = iter.next(); Object fval = fields.get(fobj); System.out.println("Field = " + fobj.toString() + "Value = " + fobj.toString()); } */ //set the field values in the pdf form /* form.setField("fieldName", "aValue"); stamp.setFormFlattening(true); stamp.close(); * */ } catch (Exception e) { throw new RedbasinException("Some pdf error occurred", e); } return sb.toString(); }