List of usage examples for com.itextpdf.text.pdf PdfDictionary getAsDict
public PdfDictionary getAsDict(final PdfName key)
PdfObject
as a PdfDictionary
, resolving indirect references. From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private void unpack(Set<PdfDictionary> dictionaries) throws TaskIOException { for (PdfDictionary dictionary : dictionaries) { PdfName type = dictionary.getAsName(PdfName.TYPE); if (PdfName.F.equals(type) || PdfName.FILESPEC.equals(type)) { PdfDictionary ef = dictionary.getAsDict(PdfName.EF); PdfString fn = dictionary.getAsString(PdfName.F); if (fn != null && ef != null) { PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F)); if (prs != null) { File tmpFile = copyToTemporaryFile(prs); outputWriter.addOutput(file(tmpFile).name(fn.toUnicodeString())); }//from w w w.j a v a2s . c o m } } } }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private Set<PdfDictionary> getEmbeddedFilesDictionaries(PdfReader reader) { Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>(); PdfDictionary catalog = reader.getCatalog(); PdfDictionary names = catalog.getAsDict(PdfName.NAMES); if (names != null) { PdfDictionary embFiles = names.getAsDict(PdfName.EMBEDDEDFILES); if (embFiles != null) { HashMap<String, PdfObject> embMap = PdfNameTree.readTree(embFiles); for (PdfObject value : embMap.values()) { retSet.add((PdfDictionary) PdfReader.getPdfObject(value)); }//from w ww . j a v a 2 s .co m } } return retSet; }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) { Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>(); for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS); if (annots != null) { for (PdfObject current : annots) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current); if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) { retSet.add(annot.getAsDict(PdfName.FS)); }/* ww w .ja v a 2s . co m*/ } } } return retSet; }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Processes PDF's fonts dictionary. During the process alternative names * of Standard 14 Fonts are changed to the standard ones, provided that * the font definition doesn't include Widths array. * * Font dictionary in PDF file often includes an array of individual glyphs' widths. * Widths array is always required except for the Standard 14 Fonts, which widths * are kept by iText itself. Unfortunately, if the font uses alternative name instead of * standard one (see PDF Reference 1.7, table H.3), iText doesn't recognize the font as * one of the Standard 14 Fonts, and is unable to determine glyphs widths. In such cases * this method will change alternative names to standard ones before PDF's parsing process */// ww w . ja v a 2s . c o m private void processAlternativeFontNames(PdfDictionary resources) { if (resources == null) { return; } PdfDictionary fontsDictionary = resources.getAsDict(PdfName.FONT); if (fontsDictionary == null) { return; } for (PdfName pdfFontName : fontsDictionary.getKeys()) { if (!(fontsDictionary.get(pdfFontName) instanceof PRIndirectReference)) { return; } PRIndirectReference indRef = (PRIndirectReference) fontsDictionary.get(pdfFontName); if (!(PdfReader.getPdfObjectRelease(indRef) instanceof PdfDictionary)) { return; } PdfDictionary fontDictionary = (PdfDictionary) PdfReader.getPdfObjectRelease(indRef); PdfName baseFont = fontDictionary.getAsName(PdfName.BASEFONT); if (baseFont != null) { String fontName = PdfName.decodeName(baseFont.toString()); if (fontDictionary.getAsArray(PdfName.WIDTHS) == null && ALT_TO_STANDART_FONTS.containsKey(fontName)) { fontDictionary.put(PdfName.BASEFONT, ALT_TO_STANDART_FONTS.get(fontName)); } } } }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
private void processAlternativeColorSpace(PdfDictionary resources) { if (resources == null) { return;//from w w w. j a v a 2 s .c om } PdfDictionary csDictionary = resources.getAsDict(PdfName.COLORSPACE); if (csDictionary == null) { return; } for (PdfName csName : csDictionary.getKeys()) { if (csDictionary.getAsArray(csName) != null) { csDictionary.put(csName, PdfName.DEVICEGRAY); } } }
From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java
License:Apache License
private static void extractMetadata(PdfReader reader, Metadata metadata) { try {/*from www . jav a 2s .c o m*/ HashMap<String, String> map = reader.getInfo(); // Clone the PDF info: for (String key : map.keySet()) { metadata.set(key.toLowerCase(), map.get(key)); } // Add other data of interest: metadata.set("pdf:version", "1." + reader.getPdfVersion()); metadata.set("pdf:numPages", "" + reader.getNumberOfPages()); metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader)); metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions()); metadata.set("pdf:encrypted", "" + reader.isEncrypted()); metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted()); metadata.set("pdf:128key", "" + reader.is128Key()); metadata.set("pdf:tampered", "" + reader.isTampered()); // Also grap XMP metadata, if present: byte[] xmpmd = reader.getMetadata(); if (xmpmd != null) { // This is standard Tika code for parsing standard stuff from the XMP: JempboxExtractor extractor = new JempboxExtractor(metadata); extractor.parse(new ByteArrayInputStream(xmpmd)); // This is custom XMP-handling code: XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd)); // There is a special class for grabbing data in the PDF schema - not sure it will add much here: // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here. //XMPSchemaPDF pdfxmp = xmp.getPDFSchema(); // Added a PDF/A schema class: xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class); XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", pdfaxmp.getPart()); metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(); //metadata.set("pdfa:version", version ); metadata.set("pdf:version", version); } } // Attempt to determine Adobe extension level: PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS); if (extensions != null) { PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE); if (adobeExt != null) { PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION); int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue(); metadata.set("pdf:version", baseVersion.toString().substring(1) + " Adobe Extension Level " + el); } } // Ensure the normalised metadata are mapped in: if (map.get("Title") != null) metadata.set(Metadata.TITLE, map.get("Title")); if (map.get("Author") != null) metadata.set(Metadata.AUTHOR, map.get("Author")); } catch (Exception e) { System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage()); e.printStackTrace(); } }