List of usage examples for com.lowagie.text.pdf PdfPageLabels getPageLabels
public static String[] getPageLabels(PdfReader reader)
From source file:de.unigoettingen.sub.commons.contentlib.pdflib.PDFCreator.java
License:Apache License
public void createPDF(OutputStream out, List<DocumentPart> metsparts, PDFConfiguration pdfconfig, MetadataExtractor inMetadataExtractor, StructureMetadataExtractor inBookmarkExtractor, Watermark myWatermark) throws ImageManagerException, FileNotFoundException, IOException, PDFManagerException, ImageInterpreterException, URISyntaxException, MetsException { PDFManager pdfmanager = null;//from ww w . j av a2 s.c o m String creator = ""; String title = ""; String keywords = ""; int documentpartcounter = 0; LinkedList<METSParser> allMetsParser = new LinkedList<METSParser>(); HashMap<Integer, UrlImage> allPages = new HashMap<Integer, UrlImage>(); HashMap<Integer, String> allPageNames = new HashMap<Integer, String>(); HashMap<Integer, PDFTitlePage> allTitlePages = new HashMap<Integer, PDFTitlePage>(); List<PDFBookmark> allRootBookmarks = new LinkedList<PDFBookmark>(); // iterate over all DocumentParts for (DocumentPart dp : metsparts) { documentpartcounter++; String pdfdivid = null; Map<Integer, UrlImage> documentpartPages = null; if (dp.getType() == DocumentPartType.METS) { // read the METS file and handle all the images // TODO: Do not invoke the METS Parser direct, use the interface instead METSParser metsparser = new METSParser(dp.getUrl(), true); // set METSParser configuration metsparser.setMetadataextractor(inMetadataExtractor); metsparser.setStructureMetadataExtractor(inBookmarkExtractor); if (dp.getMetsFileGroup() != null) { metsparser.setFilegroupsuseattributevalue(dp.getMetsFileGroup()); } if (dp.getDivid() == null) { pdfdivid = metsparser.getUppermostDivIDForPDF(); } else { pdfdivid = dp.getDivid(); } // calculate metadata inMetadataExtractor.calculatePDFMetadata(pdfdivid, metsparser); String title1 = inMetadataExtractor.getPdftitle(); String creator1 = inMetadataExtractor.getPdfcreator(); String keywords1 = inMetadataExtractor.getPdfkeywords(); if (title1 != null) { if (title.equals("")) { title = title1; } else { title = title + "; " + title1; } } if (creator1 != null) { if (creator.equals("")) { creator = creator1; } else { creator = creator + "; " + creator1; } } if (keywords1 != null) { if (keywords.equals("")) { keywords = keywords1; } else { keywords = keywords + "; " + keywords1; } } LOGGER.debug("Title1: " + title1); LOGGER.debug("Creator1: " + creator1); LOGGER.debug("Keywords1: " + keywords1); metsparser.getAllFilesForRelatedDivs(pdfdivid); // get page // names Map<Integer, String> myPageNames = metsparser.getPageNames(); // get list of files and pagenames documentpartPages = metsparser.getImageMap(); if (documentpartPages.isEmpty()) { // nothing in here; probably METS file has no pages // don't add METS file to list LOGGER.error("No page files / page urls available!"); } else { // change page names to make them unique // within the PDF, different METSparsers // will have same pageName for (Integer i : documentpartPages.keySet()) { UrlImage page = documentpartPages.get(i); String pagename = myPageNames.get(i); // calculate new integer int dpc = (documentpartcounter * 1000) + i; LOGGER.debug("adding page " + dpc + " to list"); // add to new HashMaps allPages.put(dpc, page); allPageNames.put(dpc, pagename); } // handle all bookmarks // need to change page number as well List<PDFBookmark> bookmarks; bookmarks = PDFBookmark.convertList(metsparser.getStructureList()); for (PDFBookmark b : bookmarks) { // change page numbers changeBookmarksPagenumber(b, documentpartcounter); allRootBookmarks.add(b); } // add METSParser to list allMetsParser.add(metsparser); } } else if (dp.getType() == DocumentPartType.PDF) { // handle the PDF part PdfReader pdfreader = new PdfReader(dp.getUrl()); int numberofpages = pdfreader.getNumberOfPages(); for (Integer i = 1; i < numberofpages + 1; i++) { PDFPage pdfpage = new PDFPage(); pdfpage.setPdfreader(pdfreader); pdfpage.setPageNumber(i); int dpc = (documentpartcounter * 1000) + (i); LOGGER.debug("adding page " + dpc + " to list"); // add page to allPages allPages.put(dpc, pdfpage); // adding page labales String labels[] = PdfPageLabels.getPageLabels(pdfreader); if ((labels != null) && (i < labels.length)) { LOGGER.debug("adding Page label (" + i + "):" + labels[i - 1]); allPageNames.put(dpc, labels[i - 1].substring(0, labels[i - 1].length() - 1)); } } // add Bookmarks allRootBookmarks = extractBookmarksFromPDF(pdfreader, allRootBookmarks, documentpartcounter); } // handle the title page of this DocumentPart if (dp.getTitlepage() != null) { // title page is available // set the layout of the content file // set structType if (inMetadataExtractor.getStructType() != null) { dp.getTitlepage().setStructuretype(inMetadataExtractor.getStructType()); } dp.getTitlepage().deleteTitleLines(); // set Lines if (inMetadataExtractor.getPdfTitlepageLine1() != null) { PDFTitlePageLine ptl = new PDFTitlePageLine(inMetadataExtractor.getPdfTitlepageLine1()); ptl.setContent(inMetadataExtractor.getPdfTitlepageLine1()); ptl.setLinetype(2); ptl.setFontsize(14); dp.getTitlepage().addPDFTitlePageLine(ptl); } if (inMetadataExtractor.getPdfTitlepageLine2() != null) { PDFTitlePageLine ptl = new PDFTitlePageLine(inMetadataExtractor.getPdfTitlepageLine2()); ptl.setLinetype(2); ptl.setFontsize(10); dp.getTitlepage().addPDFTitlePageLine(ptl); } if (inMetadataExtractor.getPdfTitlepageLine3() != null) { PDFTitlePageLine ptl = new PDFTitlePageLine(inMetadataExtractor.getPdfTitlepageLine3()); ptl.setLinetype(2); ptl.setFontsize(10); dp.getTitlepage().addPDFTitlePageLine(ptl); } if (inMetadataExtractor.getPdfTitlepageLine4() != null) { PDFTitlePageLine ptl = new PDFTitlePageLine(inMetadataExtractor.getPdfTitlepageLine4()); ptl.setLinetype(2); ptl.setFontsize(10); dp.getTitlepage().addPDFTitlePageLine(ptl); } // get name of the first page if (documentpartPages != null) { Map<Integer, UrlImage> sortedMap = new TreeMap<Integer, UrlImage>(documentpartPages); Iterator<Integer> it2 = sortedMap.keySet().iterator(); Integer firstpagename = 0; // TODO: GDZ: Should this just get the first element? - yes // I tried to find a more elegant way but my google didn't work #googleneverlikedme while (it2.hasNext()) { firstpagename = it2.next(); firstpagename = (documentpartcounter * 1000) + firstpagename; LOGGER.debug("Adding PDFTitlePage at page " + firstpagename); break; } allTitlePages.put(firstpagename, dp.getTitlepage()); } } } // end of while over all document parts // setting for PDFManager pdfmanager = new PDFManager(allPages); pdfmanager.setAlwaysUseRenderedImage(pdfconfig.isPdfDefaultAlwaysUseRenderedImage()); pdfmanager.setAlwaysCompressToJPEG(pdfconfig.isPdfDefaultAlwaysCompressToJPEG()); pdfmanager.setPdfa(pdfconfig.isWriteAsPdfA()); // set pages LOGGER.debug(allPages.size() + " pages for PDFManager set"); pdfmanager.setImageURLs(allPages); pdfmanager.setImageNames(allPageNames); pdfmanager.setStructureList(allRootBookmarks); pdfmanager.setPdftitlepages(allTitlePages); // set metadata if (!title.equals("")) { pdfmanager.setTitle(title); } if (!creator.equals("")) { pdfmanager.setCreator(creator); pdfmanager.setAuthor(creator); } if (!keywords.equals("")) { pdfmanager.setKeyword(keywords); } // set an ICC profile if (pdfconfig.getIccinputfilename() != null) { ICC_Profile iccprofile = ICC_Profile.getInstance(pdfconfig.getIccinputfilename()); pdfmanager.setIccprofile(iccprofile); } pdfmanager.createPDF(out, pdfconfig.getPagesize(), myWatermark); }