List of usage examples for com.lowagie.text.pdf PdfStamper getMoreInfo
public HashMap getMoreInfo()
String
map to add or change values in the info dictionary. From source file:org.mnsoft.pdfocr.Wrapper.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" }) private void mergePDFs(File foreground, File background, File newFile, String title, String subject, String keywords, String author, String creator) { log.debug("Merge " + foreground + " (FG) and " + background + " (BG) to " + newFile); final double threshold = ((Integer) StringUtility.StringToInteger(getAttribute("THRESHOLD"), 2)) .doubleValue();//from ww w . j ava2 s.co m try { /* * Foreground: Original Image. * Background: OCR'd Text */ final PdfReader fg = new PdfReader(foreground.getAbsolutePath()); final PdfReader bg = new PdfReader(background.getAbsolutePath()); /* * Count pages for foreground and background */ final int fg_num_pages = fg.getNumberOfPages(); final int bg_num_pages = bg.getNumberOfPages(); if (fg_num_pages != bg_num_pages) { log.error( "! Foreground and background have different number of pages. This should really not happen."); } /* * The output document */ final PdfStamper fg_writer = new PdfStamper(fg, new FileOutputStream(newFile)); /* * Create a PdfTemplate from the first page of mark * (PdfImportedPage is derived from PdfTemplate) */ PdfImportedPage bg_page = null; for (int i = 0; i < fg_num_pages;) { ++i; System.out.print(" [" + i + "]"); final byte[] fg_page_content = fg.getPageContent(i); final byte[] bg_page_content = bg.getPageContent(i); final int bg_size = bg_page_content.length; final int fg_size = fg_page_content.length; /* * If we're not explicitly merging, we're merging * the document with itself only anyway. */ if (!"true".equals(getAttribute("mergefiles"))) { continue; } /* * Modification 20130904 * * We want to scan only what's not been generated by a number of * generators. So, until now, the generator of whom we wanted to * ignore files was ocr, i.e. the one we set ourselves. Now, we * have seen that when we run an OCR on a "pdf+text" file, as we * collate in post the file with its image, we get an overlapping * text which is not pixel correct, i.e. which makes the PDF appear * not nicely. * * If the background image is not at least threshold times as large as * the foreground image, we assume we've been working on a * page that was plain text already, and don't add the image * to the background. */ if ((bg_size / fg_size) <= threshold) { log.debug("! Not adding background for page " + i + " since background size (" + bg_size + ") not different enough from foreground size (" + fg_size + ")."); continue; } bg_page = fg_writer.getImportedPage(bg, i); final PdfContentByte contentByte = fg_writer.getUnderContent(i); contentByte.addTemplate(bg_page, 0, 0); } HashMap map = fg_writer.getMoreInfo(); if (map == null) { map = new HashMap(); } if (title != null) { map.put("Title", title); } if (subject != null) { map.put("Subject", subject); } if (keywords != null) { map.put("Keywords", keywords); } if (author != null) { map.put("Author", author); } if (creator != null) { map.put("Creator", creator); } fg_writer.setMoreInfo(map); fg_writer.close(); System.out.println(""); } catch (Exception e) { e.printStackTrace(); } }