List of usage examples for com.itextpdf.text.pdf PdfDictionary getAsName
public PdfName getAsName(final PdfName key)
PdfObject
as a PdfName
, resolving indirect references. From source file:de.gbv.marginalia.Annotation.java
License:Open Source License
/** * Constructs a new Annotation from a given PdfDictionary. * Of course the PdfDictionary should contain an annotation. *//*from w w w .j a v a 2s . c om*/ // public Annotation(PdfDictionary annot) { this.Annotation(annot,0); } public Annotation(PdfDictionary annot, int pageNum) { this.pageNum = pageNum; this.subtype = annot.getAsName(PdfName.SUBTYPE); // text | caret | freetext | fileattachment | highlight | ink | line | link | circle | square | // polygon | polyline | sound | squiggly | stamp | strikeout | underline this.content = annot.getAsString(PdfName.CONTENTS); this.popup = getAsDictionary(annot, PdfName.POPUP); // TODO: skipped fields: // getAsDictionary(annot,PdfName.AP); // alternative to coords! // annot.getAsName(PdfName.AS); // getAsDictionary(annot,PdfName.A); // action // getAsDictionary(annot,PdfName.A); // additional action // annot.getAsNumber(PdfName.STRUCTPARENT); // Since PDF 1.5: // RC = contents-richtext this.dict = annot; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the redact annotations contained in the document and applied to the given page. *///from ww w. ja v a 2s . c o m private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); if (pageDict.contains(PdfName.ANNOTS)) { PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); for (int i = 0; i < annotsArray.size(); ++i) { PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE); if (annotSubtype.equals(PdfName.REDACT)) { saveRedactAnnotIndirRef(page, annotIndirRef.toString()); locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict)); } } } return locations; }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private void unpack(Set<PdfDictionary> dictionaries) throws TaskIOException { for (PdfDictionary dictionary : dictionaries) { PdfName type = dictionary.getAsName(PdfName.TYPE); if (PdfName.F.equals(type) || PdfName.FILESPEC.equals(type)) { PdfDictionary ef = dictionary.getAsDict(PdfName.EF); PdfString fn = dictionary.getAsString(PdfName.F); if (fn != null && ef != null) { PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F)); if (prs != null) { File tmpFile = copyToTemporaryFile(prs); outputWriter.addOutput(file(tmpFile).name(fn.toUnicodeString())); }/* ww w . j a v a2s . c o m*/ } } } }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) { Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>(); for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS); if (annots != null) { for (PdfObject current : annots) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current); if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) { retSet.add(annot.getAsDict(PdfName.FS)); }//from w w w . ja v a 2s. co m } } } return retSet; }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Processes PDF's fonts dictionary. During the process alternative names * of Standard 14 Fonts are changed to the standard ones, provided that * the font definition doesn't include Widths array. * * Font dictionary in PDF file often includes an array of individual glyphs' widths. * Widths array is always required except for the Standard 14 Fonts, which widths * are kept by iText itself. Unfortunately, if the font uses alternative name instead of * standard one (see PDF Reference 1.7, table H.3), iText doesn't recognize the font as * one of the Standard 14 Fonts, and is unable to determine glyphs widths. In such cases * this method will change alternative names to standard ones before PDF's parsing process *//* w w w . j a v a 2 s. c o m*/ private void processAlternativeFontNames(PdfDictionary resources) { if (resources == null) { return; } PdfDictionary fontsDictionary = resources.getAsDict(PdfName.FONT); if (fontsDictionary == null) { return; } for (PdfName pdfFontName : fontsDictionary.getKeys()) { if (!(fontsDictionary.get(pdfFontName) instanceof PRIndirectReference)) { return; } PRIndirectReference indRef = (PRIndirectReference) fontsDictionary.get(pdfFontName); if (!(PdfReader.getPdfObjectRelease(indRef) instanceof PdfDictionary)) { return; } PdfDictionary fontDictionary = (PdfDictionary) PdfReader.getPdfObjectRelease(indRef); PdfName baseFont = fontDictionary.getAsName(PdfName.BASEFONT); if (baseFont != null) { String fontName = PdfName.decodeName(baseFont.toString()); if (fontDictionary.getAsArray(PdfName.WIDTHS) == null && ALT_TO_STANDART_FONTS.containsKey(fontName)) { fontDictionary.put(PdfName.BASEFONT, ALT_TO_STANDART_FONTS.get(fontName)); } } } }
From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java
License:Apache License
private static void extractMetadata(PdfReader reader, Metadata metadata) { try {//from w w w . j a v a 2 s . c om HashMap<String, String> map = reader.getInfo(); // Clone the PDF info: for (String key : map.keySet()) { metadata.set(key.toLowerCase(), map.get(key)); } // Add other data of interest: metadata.set("pdf:version", "1." + reader.getPdfVersion()); metadata.set("pdf:numPages", "" + reader.getNumberOfPages()); metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader)); metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions()); metadata.set("pdf:encrypted", "" + reader.isEncrypted()); metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted()); metadata.set("pdf:128key", "" + reader.is128Key()); metadata.set("pdf:tampered", "" + reader.isTampered()); // Also grap XMP metadata, if present: byte[] xmpmd = reader.getMetadata(); if (xmpmd != null) { // This is standard Tika code for parsing standard stuff from the XMP: JempboxExtractor extractor = new JempboxExtractor(metadata); extractor.parse(new ByteArrayInputStream(xmpmd)); // This is custom XMP-handling code: XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd)); // There is a special class for grabbing data in the PDF schema - not sure it will add much here: // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here. //XMPSchemaPDF pdfxmp = xmp.getPDFSchema(); // Added a PDF/A schema class: xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class); XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", pdfaxmp.getPart()); metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(); //metadata.set("pdfa:version", version ); metadata.set("pdf:version", version); } } // Attempt to determine Adobe extension level: PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS); if (extensions != null) { PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE); if (adobeExt != null) { PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION); int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue(); metadata.set("pdf:version", baseVersion.toString().substring(1) + " Adobe Extension Level " + el); } } // Ensure the normalised metadata are mapped in: if (map.get("Title") != null) metadata.set(Metadata.TITLE, map.get("Title")); if (map.get("Author") != null) metadata.set(Metadata.AUTHOR, map.get("Author")); } catch (Exception e) { System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage()); e.printStackTrace(); } }