List of usage examples for com.lowagie.text.pdf PdfObject isDictionary
public boolean isDictionary()
PdfObject
is of the type PdfDictionary
. From source file:com.cyberway.issue.crawler.extractor.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs// w w w . j ava 2 s .c o m * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) { this.extractURIs((PdfObject) objectList.next()); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:crawler.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*from w ww .jav a 2 s. c om*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) foundURIs.add(value.toString()); else this.extractURIs(value); } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) this.extractURIs((PdfObject) objectList.next()); // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) return; // note that we've seen it if it's new else markAsSeen(indirect.getGeneration(), indirect.getNumber()); // dereference the "pointer" and process the object indirect.getReader(); PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
@SuppressWarnings("unchecked") public java.util.List getBookmarkTitlesAsText() { java.util.List bookmarkContent = null; if (pdfReader != null) { //bookmarkContent = SimpleBookmark.getBookmark(pdfReader); PdfDictionary catalog = pdfReader.getCatalog(); if (catalog != null) { PdfObject rootPdfObject = PdfReader.getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); if (rootPdfObject != null && rootPdfObject.isDictionary()) { PdfDictionary rootOutlinesPdfDictionary = (PdfDictionary) rootPdfObject; /*//from w ww. j av a 2 s . c o m * If it doesn't exist create the List and populate it, * otherwise just return the already existing List. */ if (bookmarkTextList == null) { bookmarkTextList = new ArrayList<String>(); // Populate the List populateBookmarkTextList(rootOutlinesPdfDictionary, ""); } // end if } } // end if } return bookmarkContent; }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private Bookmark getBookmark() { PdfDictionary catalog = reader.getCatalog(); PdfObject obj = PdfReader.getPdfObjectRelease(catalog.get(PdfName.OUTLINES)); if (obj == null || !obj.isDictionary()) { return null; }// w ww . ja v a 2 s .c o m PdfDictionary outlines = (PdfDictionary) obj; // pages = new IntHashtable(); // int numPages = reader.getNumberOfPages(); // for (int k = 1; k <= numPages; ++k) { // pages.put(reader.getPageOrigRef(k).getNumber(), k); // reader.releasePage(k); // } initPages(); Bookmark root = new Bookmark(); root.setTitle("Root Bookmark"); bookmarkDepth(root, (PdfDictionary) PdfReader.getPdfObjectRelease(outlines.get(PdfName.FIRST))); // bookmarkDepthIterative(root, outlines); return root; }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void setActionInBookmark(Bookmark bookmark, PdfDictionary action) { PdfObject dest;/*w w w . j a v a2s . c o m*/ if (PdfName.GOTO.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { mapGotoBookmark(bookmark, dest); } } else if (PdfName.URI.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Uri); bookmark.setUri(((PdfString) PdfReader.getPdfObjectRelease(action.get(PdfName.URI))).toUnicodeString()); } else if (PdfName.GOTOR.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setRemoteDestination(true); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file != null) { if (file.isString()) { String path = Ut.onWindowsReplaceBackslashWithSlash(((PdfString) file).toUnicodeString()); bookmark.setRemoteFilePath(path); } else if (file.isDictionary()) { file = PdfReader.getPdfObject(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setRemoteFilePath(((PdfString) file).toUnicodeString()); } } } dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { if (dest.isString()) { bookmark.setNamedDestination(dest.toString()); } else if (dest.isName()) { bookmark.setNamedDestination(PdfName.decodeName(dest.toString())); bookmark.setNamedAsName(true); } else if (dest.isArray()) { PdfArray arr = (PdfArray) dest; PdfReader remoteReader; try { // File remoteFile = new File(bookmark.getRemoteFilePath()); // if (!remoteFile.isAbsolute()) { // File openedFile = new File(filePath); // String containingFolder = openedFile.getParent(); // String remotePath = containingFolder + File.separator + bookmark.getRemoteFilePath(); // remoteFile = new File(remotePath); // } File remoteFile = Ut.createAbsolutePath(new File(filePath), new File(bookmark.getRemoteFilePath())); remoteReader = new PdfReader(remoteFile.getCanonicalPath()); makeBookmarkParam(remoteReader, bookmark, arr, null); remoteReader.close(); } catch (IOException ex) { //System.out.println(ex.getMessage()); } finally { } } } PdfObject newWindow = PdfReader.getPdfObjectRelease(action.get(PdfName.NEWWINDOW)); if (newWindow != null) { bookmark.setNewWindow(((PdfBoolean) newWindow).booleanValue()); } } else if (PdfName.LAUNCH.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Launch); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file == null) { file = PdfReader.getPdfObjectRelease(action.get(PdfName.WIN)); } if (file != null) { if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } else if (file.isDictionary()) { file = PdfReader.getPdfObjectRelease(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } } } } else if (PdfName.HIDE.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Hide); PdfObject annotation = PdfReader.getPdfObjectRelease(action.get(PdfName.T)); if (annotation != null) { if (annotation.isDictionary()) { } else if (annotation.isArray()) { } else if (annotation.isString()) { bookmark.setFieldNameToHide(((PdfString) annotation).toUnicodeString()); } } PdfBoolean hide = (PdfBoolean) PdfReader.getPdfObjectRelease(action.get(PdfName.H)); if (hide != null) { bookmark.setHide(hide.booleanValue()); } } else { bookmark.setType(BookmarkType.Unknown); } }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void setActionsRecursive(Bookmark bookmark, PdfDictionary action) { setActionInBookmark(bookmark, action); PdfObject next = PdfReader.getPdfObjectRelease(action.get(PdfName.NEXT)); if (next != null) { if (next.isArray()) { PdfArray actions = (PdfArray) next; for (int i = 0; i < actions.size(); i++) { Bookmark b = new Bookmark(); action = actions.getAsDict(i); setActionsRecursive(b, action); bookmark.addChainedBookmark(b); }//w w w . jav a 2 s. c om } else if (next.isDictionary()) { Bookmark b = new Bookmark(); action = (PdfDictionary) next; setActionsRecursive(b, action); bookmark.addChainedBookmark(b); } } }
From source file:org.archive.modules.extractor.PDFParser.java
License:Apache License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*ww w .ja v a2 s . c om*/ * @param entity */ @SuppressWarnings("unchecked") protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; for (PdfObject pdfObject : (Iterable<PdfObject>) array.getArrayList()) { this.extractURIs(pdfObject); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }