List of usage examples for com.lowagie.text.pdf PdfDictionary get
public PdfObject get(PdfName key)
PdfObject
associated to the specified key. From source file:com.cyberway.issue.crawler.extractor.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/* w ww . j a v a2 s.c o m*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) { this.extractURIs((PdfObject) objectList.next()); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:crawler.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*from ww w . j av a 2 s . com*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) foundURIs.add(value.toString()); else this.extractURIs(value); } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) this.extractURIs((PdfObject) objectList.next()); // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) return; // note that we've seen it if it's new else markAsSeen(indirect.getGeneration(), indirect.getNumber()); // dereference the "pointer" and process the object indirect.getReader(); PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
public ArrayList<byte[]> getImages(int pageNumber) throws Exception { ArrayList<byte[]> arrayListPageImages = new ArrayList<byte[]>(); if (pdfReader != null) { int numberOfPages = pdfReader.getNumberOfPages(); if (pageNumber > 0 && pageNumber <= numberOfPages) { PdfDictionary pdfDictionary = pdfReader.getPageN(pageNumber); if (pdfDictionary != null) { //PdfDictionary pdfDictionaryResources = (PdfDictionary)pdfDictionary.get(PdfName.RESOURCES); PdfDictionary pdfDictionaryResources = (PdfDictionary) PdfReader .getPdfObject(pdfDictionary.get(PdfName.RESOURCES)); PdfDictionary pdfDictionaryXObjects = (PdfDictionary) pdfDictionaryResources .get(PdfName.XOBJECT); if (pdfDictionaryXObjects != null) { //Set myKeySet = ; //pdfDictionaryXObjects.getKeys(). PdfName pdfObjectSubType = null; for (Object pdfKeyObject : pdfDictionaryXObjects.getKeys()) { PdfObject pdfObject = pdfDictionaryXObjects.get((PdfName) pdfKeyObject); if (pdfObject.isIndirect()) { // Eventually check if pdfObject.isDictionary()...we skipped that here PdfDictionary innerPdfDictionary = (PdfDictionary) PdfReader .getPdfObject(pdfObject); if (innerPdfDictionary.isStream()) { extractImageFromPdfObjectExperimental(null, pageNumber, innerPdfDictionary); } else { //PdfName pdfObjectSubType = (PdfName)PdfReader.getPdfObject(innerPdfDictionary.get(PdfName.SUBTYPE)); pdfObjectSubType = (PdfName) PdfReader .getPdfObject(innerPdfDictionary.get(PdfName.SUBTYPE)); /* * Check if the sub-type is an "IMAGE" and * then get the actual innerPdfObject for * the image extraction code */ if (PdfName.IMAGE.equals(pdfObjectSubType)) { PdfObject innerPdfObject = pdfReader .getPdfObject(pdfObject.getIndRef().getNumber()); extractImageFromPdfObjectExperimental(null, pageNumber, innerPdfObject); }//from w w w .j a va2 s . c om } } // end if checking 'pdfObject' is indirect } // end for } // end if checking 'XObject' } // end if checking 'PdfDictionary' } // end if checking page number } else { // TODO: Add own exception. throw new Exception("There is no open PDF to work with."); } // end if..else return arrayListPageImages; }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
@SuppressWarnings("unchecked") public java.util.List getBookmarkTitlesAsText() { java.util.List bookmarkContent = null; if (pdfReader != null) { //bookmarkContent = SimpleBookmark.getBookmark(pdfReader); PdfDictionary catalog = pdfReader.getCatalog(); if (catalog != null) { PdfObject rootPdfObject = PdfReader.getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); if (rootPdfObject != null && rootPdfObject.isDictionary()) { PdfDictionary rootOutlinesPdfDictionary = (PdfDictionary) rootPdfObject; /*/* ww w. j a v a 2 s . c o m*/ * If it doesn't exist create the List and populate it, * otherwise just return the already existing List. */ if (bookmarkTextList == null) { bookmarkTextList = new ArrayList<String>(); // Populate the List populateBookmarkTextList(rootOutlinesPdfDictionary, ""); } // end if } } // end if } return bookmarkContent; }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
/** * This method will populate the text bookmark list. * /*from w ww . j a v a2 s .c o m*/ * @param rootOutlinesPdfDictionary The node element for the bookmark item. * @param indentionString The base indention string to be used. */ @SuppressWarnings("unchecked") private void populateBookmarkTextList(PdfDictionary rootOutlinesPdfDictionary, String indentionString) { PdfDictionary outlineItemPdfDictionary = (PdfDictionary) PdfReader .getPdfObjectRelease(rootOutlinesPdfDictionary.get(PdfName.FIRST)); while (outlineItemPdfDictionary != null) { PdfString bookmarkTitle = (PdfString) PdfReader .getPdfObjectRelease(outlineItemPdfDictionary.get(PdfName.TITLE)); bookmarkTextList.add(indentionString + bookmarkTitle.toUnicodeString()); logger.trace(indentionString + bookmarkTitle.toUnicodeString()); /* * Recursive call to fill List */ populateBookmarkTextList(outlineItemPdfDictionary, indentionString + bookmarkIndentionString()); /* * Get next outline item */ outlineItemPdfDictionary = (PdfDictionary) PdfReader .getPdfObjectRelease(outlineItemPdfDictionary.get(PdfName.NEXT)); } // end while }
From source file:es.gob.afirma.signers.pades.ltv.PdfDocumentSecurityStore.java
License:Open Source License
/** Constructor para inicializar la clase con una estructura DSS ya creada. * @param dss Diccionario DSS/*from www. j av a 2 s. c o m*/ * @throws IOException En caso de errores de entrada / salida */ PdfDocumentSecurityStore(final PdfDictionary dss) throws IOException { int i = 0; PdfArray arrayCerts = dss.getAsArray(new PdfName(PDF_NAME_CERTS)); if (arrayCerts != null) { for (final Iterator<PdfObject> iterator = arrayCerts.listIterator(); iterator.hasNext();) { final PdfIndirectReference reference = (PdfIndirectReference) iterator.next(); this.certificates.put(Integer.valueOf(i), getContentBytesFromContentObject(PdfReader.getPdfObject(reference))); i++; } } PdfArray arrayOcsps = dss.getAsArray(new PdfName(PDF_NAME_OCSPS)); if (arrayOcsps != null) { i = 0; for (final Iterator<PdfObject> iterator = arrayOcsps.listIterator(); iterator.hasNext();) { final PdfIndirectReference reference = (PdfIndirectReference) iterator.next(); this.ocsps.put(Integer.valueOf(i), getContentBytesFromContentObject(PdfReader.getPdfObject(reference))); i++; } } PdfArray arrayCrls = dss.getAsArray(new PdfName(PDF_NAME_CRLS)); if (arrayCrls != null) { i = 0; for (final Iterator<PdfObject> iterator = arrayCrls.listIterator(); iterator.hasNext();) { final PdfIndirectReference reference = (PdfIndirectReference) iterator.next(); this.crls.put(Integer.valueOf(i), getContentBytesFromContentObject(PdfReader.getPdfObject(reference))); i++; } } final PdfDictionary vri = dss.getAsDict(new PdfName(PDF_NAME_VRI)); if (vri != null) { PdfName key; ValidationInformation val; for (final Iterator<PdfName> iterator = vri.getKeys().iterator(); iterator.hasNext(); this.signatures .put(key.toString().substring(1), val)) { key = iterator.next(); final PdfDictionary vriEntry = vri.getAsDict(key); arrayCerts = vriEntry.getAsArray(new PdfName(PDF_NAME_CERT)); int certId[]; if (arrayCerts != null) { certId = new int[arrayCerts.size()]; for (i = 0; i < arrayCerts.size(); i++) { final PdfIndirectReference reference = (PdfIndirectReference) arrayCerts.getPdfObject(i); final byte referenceBytes[] = getContentBytesFromContentObject( PdfReader.getPdfObject(reference)); final Iterator<Integer> iteratorKeys = this.certificates.keySet().iterator(); do { if (!iteratorKeys.hasNext()) { break; } final int index = iteratorKeys.next().intValue(); if (Arrays.equals(referenceBytes, this.certificates.get(Integer.valueOf(index)))) { certId[i] = index; } } while (true); } } else { certId = new int[0]; } arrayOcsps = vriEntry.getAsArray(new PdfName(PDF_NAME_OCSP)); int ocspId[]; if (arrayOcsps != null) { ocspId = new int[arrayOcsps.size()]; i = 0; for (final Iterator<PdfObject> iteratorOcsps = arrayOcsps.listIterator(); iteratorOcsps .hasNext();) { final PdfIndirectReference reference = (PdfIndirectReference) iteratorOcsps.next(); final byte referenceBytes[] = getContentBytesFromContentObject( PdfReader.getPdfObject(reference)); final Iterator<Integer> iteratorKeys = this.ocsps.keySet().iterator(); do { if (!iteratorKeys.hasNext()) { break; } final int index = iteratorKeys.next().intValue(); if (Arrays.equals(referenceBytes, this.ocsps.get(Integer.valueOf(index)))) { ocspId[i] = index; } } while (true); i++; } } else { ocspId = new int[0]; } arrayCrls = vriEntry.getAsArray(new PdfName(PDF_NAME_CRL)); int crlId[]; if (arrayCrls != null) { crlId = new int[arrayCrls.size()]; i = 0; for (final Iterator<PdfObject> iteratorCRLs = arrayCrls.listIterator(); iteratorCRLs .hasNext();) { final PdfIndirectReference reference = (PdfIndirectReference) iteratorCRLs.next(); final byte referenceBytes[] = getContentBytesFromContentObject( PdfReader.getPdfObject(reference)); final Iterator<Integer> iteratorKeys = this.crls.keySet().iterator(); do { if (!iteratorKeys.hasNext()) { break; } final int index = iteratorKeys.next().intValue(); if (Arrays.equals(referenceBytes, this.crls.get(Integer.valueOf(index)))) { crlId[i] = index; } } while (true); i++; } } else { crlId = new int[0]; } Calendar date = null; if (vriEntry.get(PdfName.TU) != null) { if (vriEntry.get(PdfName.TU) instanceof PdfDate) { date = PdfDate.decode(((PdfDate) vriEntry.get(PdfName.TU)).getEncoding()); } if (vriEntry.get(PdfName.TU) instanceof PdfString) { date = PdfDate.decode(vriEntry.getAsString(PdfName.TU).getEncoding()); } } val = new ValidationInformation(key, certId, ocspId, crlId, date); } } }
From source file:eu.europa.cedefop.europass.jtool.util.ExtractAttachments.java
License:EUPL
/** * Extract the attachment file// w ww . j av a 2 s. c o m * @throws Exception */ public void execute() throws Exception { boolean hasAttachment = false; try { PdfReader reader = new PdfReader(in); PdfDictionary catalog = reader.getCatalog(); PdfDictionary names = (PdfDictionary) PdfReader.getPdfObject(catalog.get(PdfName.NAMES)); if (names != null) { PdfDictionary embFiles = (PdfDictionary) PdfReader .getPdfObject(names.get(new PdfName("EmbeddedFiles"))); if (embFiles != null) { HashMap embMap = PdfNameTree.readTree(embFiles); for (Iterator i = embMap.values().iterator(); i.hasNext();) { PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next()); unpackFile(filespec); } } } for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = (PdfArray) PdfReader.getPdfObject(reader.getPageN(k).get(PdfName.ANNOTS)); if (annots == null) continue; for (Iterator i = annots.listIterator(); i.hasNext();) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next()); PdfName subType = (PdfName) PdfReader.getPdfObject(annot.get(PdfName.SUBTYPE)); if (!PdfName.FILEATTACHMENT.equals(subType)) continue; PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject(annot.get(PdfName.FS)); hasAttachment = true; unpackFile(filespec); } } } catch (Exception e) { log.error("Error while extracting PDF attachements: " + e); } if (!hasAttachment) throw new Exception("PDF file does not have attachment."); }
From source file:eu.europa.cedefop.europass.jtool.util.ExtractAttachments.java
License:EUPL
/** * Get the attachment file // ww w. j a v a2s . com * @param filespec * @throws IOException */ public void unpackFile(PdfDictionary filespec) throws IOException { if (filespec == null) return; PdfName type = (PdfName) PdfReader.getPdfObject(filespec.get(PdfName.TYPE)); if (!PdfName.F.equals(type) && !PdfName.FILESPEC.equals(type)) return; PdfDictionary ef = (PdfDictionary) PdfReader.getPdfObject(filespec.get(PdfName.EF)); if (ef == null) return; PdfString fn = (PdfString) PdfReader.getPdfObject(filespec.get(PdfName.F)); if (fn == null) return; PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F)); if (prs == null) return; byte b[] = PdfReader.getStreamBytes(prs); FileOutputStream fout = new FileOutputStream(trgfile); fout.write(b); fout.close(); }
From source file:eu.europa.ec.markt.dss.validation.pades.PDFDocumentValidator.java
License:Open Source License
@Override public List<AdvancedSignature> getSignatures() { final List<AdvancedSignature> list = new ArrayList<AdvancedSignature>(); try {/*from w ww . jav a 2 s.com*/ PDFSignatureService pdfSignatureService = new ITextPDFSignatureService(); pdfSignatureService.validateSignatures(this.document.openStream(), new SignatureValidationCallback() { @Override public void validate(PdfReader reader, PdfDictionary outerCatalog, X509Certificate arg0, Date arg1, Certificate[] arg2, PdfDictionary signatureDictionary, PdfPKCS7 pk) { if (arg0 == null) { throw new NotETSICompliantException(MSG.NO_SIGNING_CERTIFICATE); } if (arg1 == null) { // throw new NotETSICompliantException(MSG.NO_SIGNING_TIME); } try { if (!signatureDictionary.get(new PdfName("Type")).equals(new PdfName("DocTimeStamp"))) { list.add(new PAdESSignature(reader, outerCatalog, signatureDictionary, pk)); } } catch (CMSException ex) { throw new RuntimeException(ex); } } }); } catch (SignatureException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } return list; }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private Bookmark getBookmark() { PdfDictionary catalog = reader.getCatalog(); PdfObject obj = PdfReader.getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); if (obj == null || !obj.isDictionary()) { return null; }/*from w w w .j a v a 2 s . co m*/ PdfDictionary outlines = (PdfDictionary) obj; // pages = new IntHashtable(); // int numPages = reader.getNumberOfPages(); // for (int k = 1; k <= numPages; ++k) { // pages.put(reader.getPageOrigRef(k).getNumber(), k); // reader.releasePage(k); // } initPages(); Bookmark root = new Bookmark(); root.setTitle("Root Bookmark"); bookmarkDepth(root, (PdfDictionary) PdfReader.getPdfObjectRelease(outlines.get(PdfName.FIRST))); // bookmarkDepthIterative(root, outlines); return root; }