List of usage examples for com.lowagie.text.pdf PdfObject isArray
public boolean isArray()
PdfObject
is of the type PdfArray
. From source file:com.cyberway.issue.crawler.extractor.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*w w w . j a v a2 s . c o m*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) { this.extractURIs((PdfObject) objectList.next()); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:crawler.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*from w w w . j a v a 2 s. c om*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) foundURIs.add(value.toString()); else this.extractURIs(value); } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) this.extractURIs((PdfObject) objectList.next()); // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) return; // note that we've seen it if it's new else markAsSeen(indirect.getGeneration(), indirect.getNumber()); // dereference the "pointer" and process the object indirect.getReader(); PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void setActionInBookmark(Bookmark bookmark, PdfDictionary action) { PdfObject dest; if (PdfName.GOTO.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { mapGotoBookmark(bookmark, dest); }/*w w w .ja v a2 s . c om*/ } else if (PdfName.URI.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Uri); bookmark.setUri(((PdfString) PdfReader.getPdfObjectRelease(action.get(PdfName.URI))).toUnicodeString()); } else if (PdfName.GOTOR.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setRemoteDestination(true); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file != null) { if (file.isString()) { String path = Ut.onWindowsReplaceBackslashWithSlash(((PdfString) file).toUnicodeString()); bookmark.setRemoteFilePath(path); } else if (file.isDictionary()) { file = PdfReader.getPdfObject(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setRemoteFilePath(((PdfString) file).toUnicodeString()); } } } dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { if (dest.isString()) { bookmark.setNamedDestination(dest.toString()); } else if (dest.isName()) { bookmark.setNamedDestination(PdfName.decodeName(dest.toString())); bookmark.setNamedAsName(true); } else if (dest.isArray()) { PdfArray arr = (PdfArray) dest; PdfReader remoteReader; try { // File remoteFile = new File(bookmark.getRemoteFilePath()); // if (!remoteFile.isAbsolute()) { // File openedFile = new File(filePath); // String containingFolder = openedFile.getParent(); // String remotePath = containingFolder + File.separator + bookmark.getRemoteFilePath(); // remoteFile = new File(remotePath); // } File remoteFile = Ut.createAbsolutePath(new File(filePath), new File(bookmark.getRemoteFilePath())); remoteReader = new PdfReader(remoteFile.getCanonicalPath()); makeBookmarkParam(remoteReader, bookmark, arr, null); remoteReader.close(); } catch (IOException ex) { //System.out.println(ex.getMessage()); } finally { } } } PdfObject newWindow = PdfReader.getPdfObjectRelease(action.get(PdfName.NEWWINDOW)); if (newWindow != null) { bookmark.setNewWindow(((PdfBoolean) newWindow).booleanValue()); } } else if (PdfName.LAUNCH.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Launch); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file == null) { file = PdfReader.getPdfObjectRelease(action.get(PdfName.WIN)); } if (file != null) { if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } else if (file.isDictionary()) { file = PdfReader.getPdfObjectRelease(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } } } } else if (PdfName.HIDE.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Hide); PdfObject annotation = PdfReader.getPdfObjectRelease(action.get(PdfName.T)); if (annotation != null) { if (annotation.isDictionary()) { } else if (annotation.isArray()) { } else if (annotation.isString()) { bookmark.setFieldNameToHide(((PdfString) annotation).toUnicodeString()); } } PdfBoolean hide = (PdfBoolean) PdfReader.getPdfObjectRelease(action.get(PdfName.H)); if (hide != null) { bookmark.setHide(hide.booleanValue()); } } else { bookmark.setType(BookmarkType.Unknown); } }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void setActionsRecursive(Bookmark bookmark, PdfDictionary action) { setActionInBookmark(bookmark, action); PdfObject next = PdfReader.getPdfObjectRelease(action.get(PdfName.NEXT)); if (next != null) { if (next.isArray()) { PdfArray actions = (PdfArray) next; for (int i = 0; i < actions.size(); i++) { Bookmark b = new Bookmark(); action = actions.getAsDict(i); setActionsRecursive(b, action); bookmark.addChainedBookmark(b); }// ww w .j a v a2 s . co m } else if (next.isDictionary()) { Bookmark b = new Bookmark(); action = (PdfDictionary) next; setActionsRecursive(b, action); bookmark.addChainedBookmark(b); } } }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void mapGotoBookmark(Bookmark bookmark, PdfObject dest) { if (dest.isString()) { if (namesAsString == null) { namesAsString = reader.getNamedDestinationFromStrings(); }// w ww. j a va2 s . c om bookmark.setType(BookmarkType.Named); bookmark.setNamedDestination(dest.toString()); PdfArray namedDest = getNamedDestination(reader, dest.toString(), false); Bookmark namedTarget = new Bookmark(); namedTarget.setTitle(dest.toString()); makeBookmarkParam(reader, namedTarget, (PdfArray) namedDest, pages); bookmark.setNamedTarget(namedTarget); } else if (dest.isName()) { if (namesAsName == null) { namesAsName = reader.getNamedDestinationFromNames(); } bookmark.setType(BookmarkType.Named); String name = PdfName.decodeName(dest.toString()); bookmark.setNamedDestination(name); bookmark.setNamedAsName(true); PdfArray namedDest = getNamedDestination(reader, name, true); Bookmark namedTarget = new Bookmark(); namedTarget.setTitle(name); makeBookmarkParam(reader, namedTarget, (PdfArray) namedDest, pages); bookmark.setNamedTarget(namedTarget); } else if (dest.isArray()) { makeBookmarkParam(reader, bookmark, (PdfArray) dest, pages); } }
From source file:org.archive.modules.extractor.PDFParser.java
License:Apache License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs//from ww w . j a v a2 s .c o m * @param entity */ @SuppressWarnings("unchecked") protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; for (PdfObject pdfObject : (Iterable<PdfObject>) array.getArrayList()) { this.extractURIs(pdfObject); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }