List of usage examples for com.itextpdf.text.pdf PdfName toString
public String toString()
String
-representation of this PdfObject
. From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java
License:Open Source License
@Override public ArrayList getAttachments() throws IOException { ArrayList files = new ArrayList(); PdfReader reader = new PdfReader(conf.getPDFFile()); PdfDictionary root = reader.getCatalog(); PdfDictionary documentnames = root.getAsDict(PdfName.NAMES); PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES); PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES); PdfDictionary filespec;//from w ww . j a v a 2 s . c o m PdfDictionary refs; for (int i = 0; i < filespecs.size();) { filespecs.getAsName(i++); filespec = filespecs.getAsDict(i++); refs = filespec.getAsDict(PdfName.EF); Iterator it = refs.getKeys().iterator(); while (it.hasNext()) { PdfName key = (PdfName) it.next(); if (key.toString().equals("/F")) { String filename = "-"; String desc = "-"; int size = -1; String moddate = "-"; String compsize = "-"; PdfObject pdfobj = null; try { filename = filespec.getAsString(key).toString(); } catch (Exception e) { log.warn("Cannot load attachment-name - " + e.getMessage()); } try { desc = filespec.getAsString(PdfName.DESC).toString(); } catch (Exception e) { log.warn("Cannot load attachment-description - " + e.getMessage()); } byte[] attBytes = null; try { PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key)); attBytes = PdfReader.getStreamBytes(stream); size = attBytes.length; } catch (Exception e) { log.warn("Cannot load attachment-size - " + e.getMessage()); } try { pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key)); } catch (Exception e) { log.warn("Cannot load attachment-pdfobject - " + e.getMessage()); } Hashtable fileData = new Hashtable(); fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description fileData.put(ATTACHMENT_SIZE_INT, size); //size fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes files.add(fileData); } } } return files; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
private int getXObjNum(PdfName xobjName) { String decodedPdfName = PdfName.decodeName(xobjName.toString()); if (decodedPdfName.lastIndexOf(XOBJ_NAME_PREFIX) == -1) { return 0; }/*from w w w .j a va 2 s .com*/ String numStr = decodedPdfName .substring(decodedPdfName.lastIndexOf(XOBJ_NAME_PREFIX) + XOBJ_NAME_PREFIX.length()); return Integer.parseInt(numStr); }
From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java
License:Open Source License
/** * Processes PDF's fonts dictionary. During the process alternative names * of Standard 14 Fonts are changed to the standard ones, provided that * the font definition doesn't include Widths array. * * Font dictionary in PDF file often includes an array of individual glyphs' widths. * Widths array is always required except for the Standard 14 Fonts, which widths * are kept by iText itself. Unfortunately, if the font uses alternative name instead of * standard one (see PDF Reference 1.7, table H.3), iText doesn't recognize the font as * one of the Standard 14 Fonts, and is unable to determine glyphs widths. In such cases * this method will change alternative names to standard ones before PDF's parsing process *//*w w w.j a va 2 s. c o m*/ private void processAlternativeFontNames(PdfDictionary resources) { if (resources == null) { return; } PdfDictionary fontsDictionary = resources.getAsDict(PdfName.FONT); if (fontsDictionary == null) { return; } for (PdfName pdfFontName : fontsDictionary.getKeys()) { if (!(fontsDictionary.get(pdfFontName) instanceof PRIndirectReference)) { return; } PRIndirectReference indRef = (PRIndirectReference) fontsDictionary.get(pdfFontName); if (!(PdfReader.getPdfObjectRelease(indRef) instanceof PdfDictionary)) { return; } PdfDictionary fontDictionary = (PdfDictionary) PdfReader.getPdfObjectRelease(indRef); PdfName baseFont = fontDictionary.getAsName(PdfName.BASEFONT); if (baseFont != null) { String fontName = PdfName.decodeName(baseFont.toString()); if (fontDictionary.getAsArray(PdfName.WIDTHS) == null && ALT_TO_STANDART_FONTS.containsKey(fontName)) { fontDictionary.put(PdfName.BASEFONT, ALT_TO_STANDART_FONTS.get(fontName)); } } } }
From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java
License:Apache License
private static void extractMetadata(PdfReader reader, Metadata metadata) { try {// w ww. j a va 2 s . co m HashMap<String, String> map = reader.getInfo(); // Clone the PDF info: for (String key : map.keySet()) { metadata.set(key.toLowerCase(), map.get(key)); } // Add other data of interest: metadata.set("pdf:version", "1." + reader.getPdfVersion()); metadata.set("pdf:numPages", "" + reader.getNumberOfPages()); metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader)); metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions()); metadata.set("pdf:encrypted", "" + reader.isEncrypted()); metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted()); metadata.set("pdf:128key", "" + reader.is128Key()); metadata.set("pdf:tampered", "" + reader.isTampered()); // Also grap XMP metadata, if present: byte[] xmpmd = reader.getMetadata(); if (xmpmd != null) { // This is standard Tika code for parsing standard stuff from the XMP: JempboxExtractor extractor = new JempboxExtractor(metadata); extractor.parse(new ByteArrayInputStream(xmpmd)); // This is custom XMP-handling code: XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd)); // There is a special class for grabbing data in the PDF schema - not sure it will add much here: // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here. //XMPSchemaPDF pdfxmp = xmp.getPDFSchema(); // Added a PDF/A schema class: xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class); XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", pdfaxmp.getPart()); metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(); //metadata.set("pdfa:version", version ); metadata.set("pdf:version", version); } } // Attempt to determine Adobe extension level: PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS); if (extensions != null) { PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE); if (adobeExt != null) { PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION); int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue(); metadata.set("pdf:version", baseVersion.toString().substring(1) + " Adobe Extension Level " + el); } } // Ensure the normalised metadata are mapped in: if (map.get("Title") != null) metadata.set(Metadata.TITLE, map.get("Title")); if (map.get("Author") != null) metadata.set(Metadata.AUTHOR, map.get("Author")); } catch (Exception e) { System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage()); e.printStackTrace(); } }