List of usage examples for com.lowagie.text.pdf PdfStamper getMoreInfo
public HashMap getMoreInfo()
String map to add or change values in the info dictionary. From source file:org.mnsoft.pdfocr.Wrapper.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" })
private void mergePDFs(File foreground, File background, File newFile, String title, String subject,
String keywords, String author, String creator) {
log.debug("Merge " + foreground + " (FG) and " + background + " (BG) to " + newFile);
final double threshold = ((Integer) StringUtility.StringToInteger(getAttribute("THRESHOLD"), 2))
.doubleValue();//from ww w . j ava2 s.co m
try {
/*
* Foreground: Original Image.
* Background: OCR'd Text
*/
final PdfReader fg = new PdfReader(foreground.getAbsolutePath());
final PdfReader bg = new PdfReader(background.getAbsolutePath());
/*
* Count pages for foreground and background
*/
final int fg_num_pages = fg.getNumberOfPages();
final int bg_num_pages = bg.getNumberOfPages();
if (fg_num_pages != bg_num_pages) {
log.error(
"! Foreground and background have different number of pages. This should really not happen.");
}
/*
* The output document
*/
final PdfStamper fg_writer = new PdfStamper(fg, new FileOutputStream(newFile));
/*
* Create a PdfTemplate from the first page of mark
* (PdfImportedPage is derived from PdfTemplate)
*/
PdfImportedPage bg_page = null;
for (int i = 0; i < fg_num_pages;) {
++i;
System.out.print(" [" + i + "]");
final byte[] fg_page_content = fg.getPageContent(i);
final byte[] bg_page_content = bg.getPageContent(i);
final int bg_size = bg_page_content.length;
final int fg_size = fg_page_content.length;
/*
* If we're not explicitly merging, we're merging
* the document with itself only anyway.
*/
if (!"true".equals(getAttribute("mergefiles"))) {
continue;
}
/*
* Modification 20130904
*
* We want to scan only what's not been generated by a number of
* generators. So, until now, the generator of whom we wanted to
* ignore files was ocr, i.e. the one we set ourselves. Now, we
* have seen that when we run an OCR on a "pdf+text" file, as we
* collate in post the file with its image, we get an overlapping
* text which is not pixel correct, i.e. which makes the PDF appear
* not nicely.
*
* If the background image is not at least threshold times as large as
* the foreground image, we assume we've been working on a
* page that was plain text already, and don't add the image
* to the background.
*/
if ((bg_size / fg_size) <= threshold) {
log.debug("! Not adding background for page " + i + " since background size (" + bg_size
+ ") not different enough from foreground size (" + fg_size + ").");
continue;
}
bg_page = fg_writer.getImportedPage(bg, i);
final PdfContentByte contentByte = fg_writer.getUnderContent(i);
contentByte.addTemplate(bg_page, 0, 0);
}
HashMap map = fg_writer.getMoreInfo();
if (map == null) {
map = new HashMap();
}
if (title != null) {
map.put("Title", title);
}
if (subject != null) {
map.put("Subject", subject);
}
if (keywords != null) {
map.put("Keywords", keywords);
}
if (author != null) {
map.put("Author", author);
}
if (creator != null) {
map.put("Creator", creator);
}
fg_writer.setMoreInfo(map);
fg_writer.close();
System.out.println("");
} catch (Exception e) {
e.printStackTrace();
}
}