List of usage examples for com.lowagie.text.pdf PdfObject toString
public String toString()
String
-representation of this PdfObject
. From source file:com.cyberway.issue.crawler.extractor.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/*from ww w . ja va2 s . c om*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) { this.extractURIs((PdfObject) objectList.next()); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:crawler.PDFParser.java
License:Open Source License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs/* w ww . j a v a 2s .co m*/ * @param entity */ protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; @SuppressWarnings("unchecked") Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) foundURIs.add(value.toString()); else this.extractURIs(value); } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; ArrayList arrayObjects = array.getArrayList(); Iterator objectList = arrayObjects.iterator(); while (objectList.hasNext()) this.extractURIs((PdfObject) objectList.next()); // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) return; // note that we've seen it if it's new else markAsSeen(indirect.getGeneration(), indirect.getNumber()); // dereference the "pointer" and process the object indirect.getReader(); PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
/** * @param fullExportDirectoryPath//from www . j a v a 2s . co m * @param pdfObjectCounter * @param pdfObject * @throws IOException * @throws Exception * @throws FileNotFoundException */ private void extractImageFromPdfObject(String fullExportDirectoryPath, int pdfObjectCounter, PdfObject pdfObject) throws IOException, Exception, FileNotFoundException { boolean rawByteArray = false; if (pdfObject != null) { if (pdfObject.isStream()) { PdfStream pdfStream = (PdfStream) pdfObject; PdfObject pdfObjectSubType = pdfStream.get(PdfName.SUBTYPE); // Check PDF subtype and make sure it's an Image type if (pdfObjectSubType != null && pdfObjectSubType.toString().equals(PdfName.IMAGE.toString())) { // Now we have a PDF stream object with an image but what is that exactly? byte[] byteArrayImage = null; /* * DCTDecode isn't supported by iText2. * The image can be treated as JPEG (we have already * verified it's an image): * http://www.mail-archive.com/itext-questions@lists.sourceforge.net/msg48307.html * * Check what kind of decoding has to be applied...and * get the byte array containing the image. */ if ((pdfStream.get(PdfName.FILTER)).toString().equals(PdfName.DCTDECODE.toString())) { // Get the RAW byte array byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream); rawByteArray = true; } else { /* * PdfReader.getStreamBytes(PRStream) should * automatically apply all decoding filters. * @see com.lowagie.text.pdf.PdfReader#getStreamBytes(PRStream) */ byteArrayImage = PdfReader.getStreamBytes((PRStream) pdfStream); rawByteArray = false; } /* // Test PdfImage - START logger.trace(""); logger.trace(""); if (pdfStream instanceof PdfImage) { PdfImage pdfImage = (PdfImage) pdfStream; logger.trace(""); logger.trace("Output for pdfImage object..."); logger.trace("pdfImage --> pdfName --> Id..............: " + pdfImage.get(PdfName.ID)); logger.trace("pdfImage --> pdfName --> Image...........: " + pdfImage.get(PdfName.IMAGE)); logger.trace("pdfImage --> pdfName --> ImageB..........: " + pdfImage.get(PdfName.IMAGEB)); logger.trace("pdfImage --> pdfName --> ImageC..........: " + pdfImage.get(PdfName.IMAGEC)); logger.trace("pdfImage --> pdfName --> ImageI..........: " + pdfImage.get(PdfName.IMAGEI)); logger.trace("pdfImage --> pdfName --> Imagemask.......: " + pdfImage.get(PdfName.IMAGEMASK)); logger.trace("pdfImage --> pdfName --> Info............: " + pdfImage.get(PdfName.INFO)); logger.trace("pdfImage --> pdfName --> Name............: " + pdfImage.get(PdfName.NAME)); logger.trace("pdfImage --> pdfName --> Named...........: " + pdfImage.get(PdfName.NAMED)); } else { logger.trace("pdfStream is NO instanceof PdfImage"); } // STREAM logger.trace(""); logger.trace("Output for pdfImage object..."); logger.trace("pdfObject.toString()).....................: " + pdfObject.toString()); logger.trace("pdfObjectCounter..........................: " + pdfObjectCounter); logger.trace("pdfStream --> pdfName --> Page............: " + pdfStream. get(PdfName.PAGE)); logger.trace("pdfObject.getIndRef().getNumber().........: " + (pdfObject.getIndRef()!=null?pdfObject.getIndRef().toString():"null")); logger.trace("pdfStream.getIndRef().getNumber().........: " + (pdfStream.getIndRef()!=null?pdfStream.getIndRef().toString():"null")); logger.trace("pdfStream --> pdfName --> toString........: " + pdfStream.toString()); logger.trace("pdfStream --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH)); logger.trace("pdfStream --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT)); logger.trace("pdfStream --> pdfName --> BitsPerComponent: " + pdfStream.get(PdfName.BITSPERCOMPONENT)); logger.trace("pdfStream --> pdfName --> BitsPerSample...: " + pdfStream.get(PdfName.BITSPERSAMPLE)); logger.trace("pdfStream --> pdfName --> ColorSpace......: " + pdfStream.get(PdfName.COLORSPACE)); logger.trace("pdfStream --> pdfName --> Filter..........: " + pdfStream.get(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Filter - as name: " + pdfStream.getAsName(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Name............: " + pdfStream.get(PdfName.NAME)); logger.trace("pdfStream --> pdfName --> SubType.........: " + pdfStream.get(PdfName.SUBTYPE)); */ // Extract the image name String streamImageName = (pdfStream.get(PdfName.NAME) == null ? null : pdfStream.get(PdfName.NAME).toString()); if (streamImageName != null && streamImageName.length() > 1 && streamImageName.startsWith("/")) { streamImageName = streamImageName.substring(1); } else { streamImageName = null; } // end if..else String exportFileWithoutExtension = (fullExportDirectoryPath != null ? fullExportDirectoryPath : this.fullPDFDirectoryPath) + GlobalTools.getFileNameWithoutExtension(this.fullPDFFilePath) + "_(" + "p000" + "_ref" + REF_NUMBER_FORMAT.format(pdfObjectCounter) + (streamImageName == null ? "_unk" : "_" + streamImageName) + ")"; // Test FileOutputStream fileOutputStream = null; if (rawByteArray) { fileOutputStream = new FileOutputStream(exportFileWithoutExtension + ".jpg"); fileOutputStream.write(byteArrayImage); fileOutputStream.flush(); fileOutputStream.close(); fileOutputStream = null; } else { /* * Check image details */ int pdfImageWidth = -1; int pdfImageHeight = -1; int pdfImageBitsPerComponent = -1; try { if (pdfStream.get(PdfName.BITSPERCOMPONENT).isNumber()) { pdfImageBitsPerComponent = new Integer( pdfStream.get(PdfName.BITSPERCOMPONENT).toString()).intValue(); } } catch (NumberFormatException ex) { logger.error("A NumberFormatException occurred " + "converting BITSPERCOMPONENT (w=" + pdfImageWidth + "; h=" + pdfImageHeight + "; BitsPerComponent=" + pdfImageBitsPerComponent + ".", ex); } try { if (pdfStream.get(PdfName.HEIGHT).isNumber()) { pdfImageHeight = new Integer(pdfStream.get(PdfName.HEIGHT).toString()).intValue(); } } catch (NumberFormatException ex) { logger.error("A NumberFormatException occurred " + "converting HEIGHT (w=" + pdfImageWidth + "; h=" + pdfImageHeight + "; BitsPerComponent=" + pdfImageBitsPerComponent + ".", ex); } try { if (pdfStream.get(PdfName.WIDTH).isNumber()) { pdfImageWidth = new Integer(pdfStream.get(PdfName.WIDTH).toString()).intValue(); } } catch (NumberFormatException ex) { logger.error("A NumberFormatException occurred " + "converting WIDTH (w=" + pdfImageWidth + "; h=" + pdfImageHeight + "; BitsPerComponent=" + pdfImageBitsPerComponent + ".", ex); } if (PdfName.DEVICERGB.equals(pdfStream.get(PdfName.COLORSPACE)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0 && pdfImageHeight > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage(byteArrayImage, pdfImageWidth, pdfImageHeight, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write(bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } else if (PdfName.DEVICEGRAY.equals(pdfStream.get(PdfName.COLORSPACE)) && PdfName.RUNLENGTHDECODE.equals(pdfStream.get(PdfName.FILTER)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage( ImageProcessingTools.runLengthDecode(byteArrayImage), pdfImageWidth, 2233, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write(bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } else { logger.error("Unsupported Image format or missing information to convert the image."); } // end if..else } } // end if } // end if } // end if }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText4Extractor.java
License:LGPL
/** * <p>/*from w ww. j a v a 2s . com*/ * This method searches for all image objects from the currently processed * PDF file and stores them as PDF in the given export directory or in the * same directory where the original PDF file is stored. * </p> * <p> * The filename of the images is build based on the original PDF filename * (without extension) and additional details like page number, image * number and if available the internal image name. * </p> * @param fullExportDirectoryPath The optional full export path where the images * should be stored. If not given, the location of the original PDF file is used. * @throws Exception */ private void imageExtractor(String fullExportDirectoryPath) throws Exception { if (fullExportDirectoryPath != null) { fullExportDirectoryPath = GlobalTools.checkDirectoryPath(fullExportDirectoryPath); File exportDirectory = new File(fullExportDirectoryPath); if (!exportDirectory.exists()) { exportDirectory.mkdirs(); } // end if } // end if int totalNumberOfPDFObjects = pdfReader.getXrefSize(); for (int pdfObjectCounter = 0; pdfObjectCounter < totalNumberOfPDFObjects; pdfObjectCounter++) { PdfObject pdfObject = pdfReader.getPdfObject(pdfObjectCounter); if (pdfObject != null) { if (pdfObject.isStream()) { PdfStream pdfStream = (PdfStream) pdfObject; PdfObject pdfObjectSubType = pdfStream.get(PdfName.SUBTYPE); if (pdfObject == null) { logger.debug("The internal PDF object is null."); } // end if if (!pdfObject.isStream()) { logger.debug("The internal PDF object is not representing a stream object."); } // end if // Check PDF subtype and make sure it's an Image type if (pdfObjectSubType != null && pdfObjectSubType.toString().equals(PdfName.IMAGE.toString())) { // Now we have a PDF stream object with an image but what is that exactly? //byte[] byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream); byte[] byteArrayImage = null; if (PdfName.FLATEDECODE.equals(pdfStream.getAsName(PdfName.FILTER))) { byteArrayImage = PdfReader.getStreamBytes((PRStream) pdfStream); // else if other filter (not supported yet) } else { byteArrayImage = PdfReader.getStreamBytesRaw((PRStream) pdfStream); } // end if..else // Test PdfImage - START /* PdfImage pdfImage = (PdfImage) ((PdfStream)((PRStream)pdfStream)); logger.trace("pdfImage --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH)); logger.trace("pdfImage --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT)); logger.trace("pdfImage --> pdfName --> Id..............: " + pdfImage.get(PdfName.ID)); logger.trace("pdfImage --> pdfName --> Image...........: " + pdfImage.get(PdfName.IMAGE)); logger.trace("pdfImage --> pdfName --> ImageB..........: " + pdfImage.get(PdfName.IMAGEB)); logger.trace("pdfImage --> pdfName --> ImageC..........: " + pdfImage.get(PdfName.IMAGEC)); logger.trace("pdfImage --> pdfName --> ImageI..........: " + pdfImage.get(PdfName.IMAGEI)); logger.trace("pdfImage --> pdfName --> Imagemask.......: " + pdfImage.get(PdfName.IMAGEMASK)); logger.trace("pdfImage --> pdfName --> Info............: " + pdfImage.get(PdfName.INFO)); logger.trace("pdfImage --> pdfName --> Name............: " + pdfImage.get(PdfName.NAME)); logger.trace("pdfImage --> pdfName --> Named...........: " + pdfImage.get(PdfName.NAMED)); logger.trace("pdfImage --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE)); */ // Test PdfImage - STOP // STREAM /* logger.trace("pdfObject.toString()): " + pdfObject.toString()); logger.trace("pdfObjectCounter.................: " + pdfObjectCounter); logger.trace("pdfObject.getIndRef().getNumber(): " + (pdfObject.getIndRef()!=null?pdfObject.getIndRef().toString():"null")); logger.trace("pdfStream.getIndRef().getNumber(): " + (pdfStream.getIndRef()!=null?pdfStream.getIndRef().toString():"null")); logger.trace("pdfStream --> pdfName --> toString........: " + pdfStream.toString()); logger.trace("pdfStream --> pdfName --> Width...........: " + pdfStream.get(PdfName.WIDTH)); logger.trace("pdfStream --> pdfName --> Height..........: " + pdfStream.get(PdfName.HEIGHT)); logger.trace("pdfStream --> pdfName --> BitsPerComponent: " + pdfStream.get(PdfName.BITSPERCOMPONENT)); logger.trace("pdfStream --> pdfName --> BitsPerSample...: " + pdfStream.get(PdfName.BITSPERSAMPLE)); logger.trace("pdfStream --> pdfName --> ColorSpace......: " + pdfStream.get(PdfName.COLORSPACE)); logger.trace("pdfStream --> pdfName --> CCITTFaxDecode..: " + pdfStream.get(PdfName.CCITTFAXDECODE)); logger.trace("pdfStream --> pdfName --> Document........: " + pdfStream.get(PdfName.DOCUMENT)); logger.trace("pdfStream --> pdfName --> Decode..........: " + pdfStream.get(PdfName.DECODE)); logger.trace("pdfStream --> pdfName --> DecodeParms.....: " + pdfStream.get(PdfName.DECODEPARMS)); logger.trace("pdfStream --> pdfName --> DeviceGray......: " + pdfStream.get(PdfName.DEVICEGRAY)); logger.trace("pdfStream --> pdfName --> DeviceCMYK......: " + pdfStream.get(PdfName.DEVICECMYK)); logger.trace("pdfStream --> pdfName --> DeviceRGB.......: " + pdfStream.get(PdfName.DEVICERGB)); logger.trace("pdfStream --> pdfName --> Filter..........: " + pdfStream.get(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Filter - as name: " + pdfStream.getAsName(PdfName.FILTER)); logger.trace("pdfStream --> pdfName --> Id..............: " + pdfStream.get(PdfName.ID)); logger.trace("pdfStream --> pdfName --> Image...........: " + pdfStream.get(PdfName.IMAGE)); logger.trace("pdfStream --> pdfName --> ImageB..........: " + pdfStream.get(PdfName.IMAGEB)); logger.trace("pdfStream --> pdfName --> ImageC..........: " + pdfStream.get(PdfName.IMAGEC)); logger.trace("pdfStream --> pdfName --> ImageI..........: " + pdfStream.get(PdfName.IMAGEI)); logger.trace("pdfStream --> pdfName --> Imagemask.......: " + pdfStream.get(PdfName.IMAGEMASK)); logger.trace("pdfStream --> pdfName --> Info............: " + pdfStream.get(PdfName.INFO)); logger.trace("pdfStream --> pdfName --> Name............: " + pdfStream.get(PdfName.NAME)); logger.trace("pdfStream --> pdfName --> Named...........: " + pdfStream.get(PdfName.NAMED)); logger.trace("pdfStream --> pdfName --> Named - as name.: " + pdfStream.getAsName(PdfName.NAMED)); logger.trace("pdfStream --> pdfName --> Page............: " + pdfStream.get(PdfName.PAGE)); logger.trace("pdfStream --> pdfName --> PageElement.....: " + pdfStream.get(PdfName.PAGEELEMENT)); logger.trace("pdfStream --> pdfName --> Pdf.............: " + pdfStream.get(PdfName.PDF)); logger.trace("pdfStream --> pdfName --> PdfDocEncoding..: " + pdfStream.get(PdfName.PDFDOCENCODING)); logger.trace("pdfStream --> pdfName --> Position........: " + pdfStream.get(PdfName.POSITION)); logger.trace("pdfStream --> pdfName --> Producer........: " + pdfStream.get(PdfName.PRODUCER)); logger.trace("pdfStream --> pdfName --> Properties......: " + pdfStream.get(PdfName.PROPERTIES)); logger.trace("pdfStream --> pdfName --> Sect............: " + pdfStream.get(PdfName.SECT)); logger.trace("pdfStream --> pdfName --> SubType.........: " + pdfStream.get(PdfName.SUBTYPE)); logger.trace("pdfStream --> pdfName --> Supplement......: " + pdfStream.get(PdfName.SUPPLEMENT)); logger.trace("pdfStream --> pdfName --> Title...........: " + pdfStream.get(PdfName.TITLE)); */ // logger.trace("pdfImage --> pdfName --> : " + (pdfName.).toString()); // Extract the image name String streamImageName = (pdfStream.get(PdfName.NAME) == null ? null : pdfStream.get(PdfName.NAME).toString()); if (streamImageName != null && streamImageName.length() > 1 && streamImageName.startsWith("/")) { streamImageName = streamImageName.substring(1); } else { streamImageName = null; } // end if..else String exportFileWithoutExtension = (fullExportDirectoryPath != null ? fullExportDirectoryPath : this.fullPDFDirectoryPath) + GlobalTools.getFileNameWithoutExtension(this.fullPDFFilePath) + "_(" + "p000" + "_ref" + REF_NUMBER_FORMAT.format(pdfObjectCounter) + (streamImageName == null ? "_unk" : "_" + streamImageName) + ")"; // Test FileOutputStream fileOutputStream = new FileOutputStream( exportFileWithoutExtension + ".jpg"); /* * Write given byte array to a file. */ fileOutputStream.write(byteArrayImage); fileOutputStream.flush(); fileOutputStream.close(); fileOutputStream = null; /* * Check image details */ int pdfImageBitsPerComponent = -1; try { if (pdfStream.get(PdfName.BITSPERCOMPONENT).isNumber()) { pdfImageBitsPerComponent = new Integer( pdfStream.get(PdfName.BITSPERCOMPONENT).toString()).intValue(); } } catch (NumberFormatException ex) { } int pdfImageHeight = -1; try { if (pdfStream.get(PdfName.HEIGHT).isNumber()) { pdfImageHeight = new Integer(pdfStream.get(PdfName.HEIGHT).toString()).intValue(); } } catch (NumberFormatException ex) { } int pdfImageWidth = -1; try { if (pdfStream.get(PdfName.WIDTH).isNumber()) { pdfImageWidth = new Integer(pdfStream.get(PdfName.WIDTH).toString()).intValue(); } } catch (NumberFormatException ex) { } logger.debug("Height..........:" + pdfImageHeight); logger.debug("Width...........:" + pdfImageWidth); logger.debug("BitsPerComponent:" + pdfImageBitsPerComponent); // or you could try making a java.awt.Image from the array: if (PdfName.DEVICERGB.equals(pdfStream.get(PdfName.COLORSPACE)) && PdfName.FLATEDECODE.equals(pdfStream.get(PdfName.FILTER)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0 && pdfImageHeight > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage(byteArrayImage, pdfImageWidth, pdfImageHeight, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write(bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } else if (PdfName.DEVICEGRAY.equals(pdfStream.get(PdfName.COLORSPACE)) && PdfName.RUNLENGTHDECODE.equals(pdfStream.get(PdfName.FILTER)) && pdfImageBitsPerComponent > 0 && pdfImageWidth > 0) { BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage( ImageProcessingTools.runLengthDecode(byteArrayImage), pdfImageWidth, 2233, pdfImageBitsPerComponent); if (bufferedImage != null) { ImageIO.write(bufferedImage, "PNG", new FileOutputStream(exportFileWithoutExtension + "_imageIO" + ".png")); } // end if } // end if... /* Image image = Toolkit.getDefaultToolkit().createImage(imageByteArray); BufferedImage bufferedImage = ImageProcessingTools.toBufferedImage(image, pdfImageWidth, pdfImageHeight); if (bufferedImage != null) { System.out.println("Image-Height....:" + bufferedImage.getHeight()); System.out.println("Image-Width.....:" + bufferedImage.getWidth()); System.out.println("Image-isAlphaP..:" + bufferedImage.isAlphaPremultiplied()); File pngOutputFile = new File(exportFileWithoutExtension + "_imageIO.jpg"); ImageIO.write(bufferedImage, "jpg", pngOutputFile); } */ /**/ } // end if } // end if } // end if } // end for }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void setActionInBookmark(Bookmark bookmark, PdfDictionary action) { PdfObject dest; if (PdfName.GOTO.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { mapGotoBookmark(bookmark, dest); }/* w ww .j av a 2s. c o m*/ } else if (PdfName.URI.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Uri); bookmark.setUri(((PdfString) PdfReader.getPdfObjectRelease(action.get(PdfName.URI))).toUnicodeString()); } else if (PdfName.GOTOR.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setRemoteDestination(true); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file != null) { if (file.isString()) { String path = Ut.onWindowsReplaceBackslashWithSlash(((PdfString) file).toUnicodeString()); bookmark.setRemoteFilePath(path); } else if (file.isDictionary()) { file = PdfReader.getPdfObject(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setRemoteFilePath(((PdfString) file).toUnicodeString()); } } } dest = PdfReader.getPdfObjectRelease(action.get(PdfName.D)); if (dest != null) { if (dest.isString()) { bookmark.setNamedDestination(dest.toString()); } else if (dest.isName()) { bookmark.setNamedDestination(PdfName.decodeName(dest.toString())); bookmark.setNamedAsName(true); } else if (dest.isArray()) { PdfArray arr = (PdfArray) dest; PdfReader remoteReader; try { // File remoteFile = new File(bookmark.getRemoteFilePath()); // if (!remoteFile.isAbsolute()) { // File openedFile = new File(filePath); // String containingFolder = openedFile.getParent(); // String remotePath = containingFolder + File.separator + bookmark.getRemoteFilePath(); // remoteFile = new File(remotePath); // } File remoteFile = Ut.createAbsolutePath(new File(filePath), new File(bookmark.getRemoteFilePath())); remoteReader = new PdfReader(remoteFile.getCanonicalPath()); makeBookmarkParam(remoteReader, bookmark, arr, null); remoteReader.close(); } catch (IOException ex) { //System.out.println(ex.getMessage()); } finally { } } } PdfObject newWindow = PdfReader.getPdfObjectRelease(action.get(PdfName.NEWWINDOW)); if (newWindow != null) { bookmark.setNewWindow(((PdfBoolean) newWindow).booleanValue()); } } else if (PdfName.LAUNCH.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Launch); PdfObject file = PdfReader.getPdfObjectRelease(action.get(PdfName.F)); if (file == null) { file = PdfReader.getPdfObjectRelease(action.get(PdfName.WIN)); } if (file != null) { if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } else if (file.isDictionary()) { file = PdfReader.getPdfObjectRelease(((PdfDictionary) file).get(PdfName.F)); if (file.isString()) { bookmark.setFileToLaunch(((PdfString) file).toUnicodeString()); } } } } else if (PdfName.HIDE.equals(PdfReader.getPdfObjectRelease(action.get(PdfName.S)))) { bookmark.setType(BookmarkType.Hide); PdfObject annotation = PdfReader.getPdfObjectRelease(action.get(PdfName.T)); if (annotation != null) { if (annotation.isDictionary()) { } else if (annotation.isArray()) { } else if (annotation.isString()) { bookmark.setFieldNameToHide(((PdfString) annotation).toUnicodeString()); } } PdfBoolean hide = (PdfBoolean) PdfReader.getPdfObjectRelease(action.get(PdfName.H)); if (hide != null) { bookmark.setHide(hide.booleanValue()); } } else { bookmark.setType(BookmarkType.Unknown); } }
From source file:it.flavianopetrocchi.jpdfbookmarks.itextbookmarksconverter.iTextBookmarksConverter.java
License:Open Source License
private void mapGotoBookmark(Bookmark bookmark, PdfObject dest) { if (dest.isString()) { if (namesAsString == null) { namesAsString = reader.getNamedDestinationFromStrings(); }//from w w w . j a v a 2s .c om bookmark.setType(BookmarkType.Named); bookmark.setNamedDestination(dest.toString()); PdfArray namedDest = getNamedDestination(reader, dest.toString(), false); Bookmark namedTarget = new Bookmark(); namedTarget.setTitle(dest.toString()); makeBookmarkParam(reader, namedTarget, (PdfArray) namedDest, pages); bookmark.setNamedTarget(namedTarget); } else if (dest.isName()) { if (namesAsName == null) { namesAsName = reader.getNamedDestinationFromNames(); } bookmark.setType(BookmarkType.Named); String name = PdfName.decodeName(dest.toString()); bookmark.setNamedDestination(name); bookmark.setNamedAsName(true); PdfArray namedDest = getNamedDestination(reader, name, true); Bookmark namedTarget = new Bookmark(); namedTarget.setTitle(name); makeBookmarkParam(reader, namedTarget, (PdfArray) namedDest, pages); bookmark.setNamedTarget(namedTarget); } else if (dest.isArray()) { makeBookmarkParam(reader, bookmark, (PdfArray) dest, pages); } }
From source file:org.apache.ofbiz.content.survey.PdfSurveyServices.java
License:Apache License
/** * *//*w ww. j a v a 2 s . c o m*/ public static Map<String, Object> buildSurveyFromPdf(DispatchContext dctx, Map<String, ? extends Object> context) { Delegator delegator = dctx.getDelegator(); LocalDispatcher dispatcher = dctx.getDispatcher(); GenericValue userLogin = (GenericValue) context.get("userLogin"); Locale locale = (Locale) context.get("locale"); Timestamp nowTimestamp = UtilDateTime.nowTimestamp(); String surveyId = null; try { String surveyName = (String) context.get("surveyName"); ByteArrayOutputStream os = new ByteArrayOutputStream(); ByteBuffer byteBuffer = getInputByteBuffer(context, delegator); PdfReader pdfReader = new PdfReader(byteBuffer.array()); PdfStamper pdfStamper = new PdfStamper(pdfReader, os); AcroFields acroFields = pdfStamper.getAcroFields(); Map<String, Object> acroFieldMap = UtilGenerics.checkMap(acroFields.getFields()); String contentId = (String) context.get("contentId"); GenericValue survey = null; surveyId = (String) context.get("surveyId"); if (UtilValidate.isEmpty(surveyId)) { survey = delegator.makeValue("Survey", UtilMisc.toMap("surveyName", surveyName)); survey.set("surveyId", surveyId); survey.set("allowMultiple", "Y"); survey.set("allowUpdate", "Y"); survey = delegator.createSetNextSeqId(survey); surveyId = survey.getString("surveyId"); } // create a SurveyQuestionCategory to put the questions in Map<String, Object> createCategoryResultMap = dispatcher.runSync("createSurveyQuestionCategory", UtilMisc.<String, Object>toMap("description", "From AcroForm in Content [" + contentId + "] for Survey [" + surveyId + "]", "userLogin", userLogin)); String surveyQuestionCategoryId = (String) createCategoryResultMap.get("surveyQuestionCategoryId"); pdfStamper.setFormFlattening(true); for (String fieldName : acroFieldMap.keySet()) { AcroFields.Item item = acroFields.getFieldItem(fieldName); int type = acroFields.getFieldType(fieldName); String value = acroFields.getField(fieldName); Debug.logInfo("fieldName:" + fieldName + "; item: " + item + "; value: " + value, module); GenericValue surveyQuestion = delegator.makeValue("SurveyQuestion", UtilMisc.toMap("question", fieldName)); String surveyQuestionId = delegator.getNextSeqId("SurveyQuestion"); surveyQuestion.set("surveyQuestionId", surveyQuestionId); surveyQuestion.set("surveyQuestionCategoryId", surveyQuestionCategoryId); if (type == AcroFields.FIELD_TYPE_TEXT) { surveyQuestion.set("surveyQuestionTypeId", "TEXT_SHORT"); } else if (type == AcroFields.FIELD_TYPE_RADIOBUTTON) { surveyQuestion.set("surveyQuestionTypeId", "OPTION"); } else if (type == AcroFields.FIELD_TYPE_LIST || type == AcroFields.FIELD_TYPE_COMBO) { surveyQuestion.set("surveyQuestionTypeId", "OPTION"); // TODO: handle these specially with the acroFields.getListOptionDisplay (and getListOptionExport?) } else { surveyQuestion.set("surveyQuestionTypeId", "TEXT_SHORT"); Debug.logWarning("Building Survey from PDF, fieldName=[" + fieldName + "]: don't know how to handle field type: " + type + "; defaulting to short text", module); } // ==== create a good sequenceNum based on tab order or if no tab order then the page location Integer tabPage = item.getPage(0); Integer tabOrder = item.getTabOrder(0); Debug.logInfo("tabPage=" + tabPage + ", tabOrder=" + tabOrder, module); //array of float multiple of 5. For each of this groups the values are: [page, llx, lly, urx, ury] float[] fieldPositions = acroFields.getFieldPositions(fieldName); float fieldPage = fieldPositions[0]; float fieldLlx = fieldPositions[1]; float fieldLly = fieldPositions[2]; float fieldUrx = fieldPositions[3]; float fieldUry = fieldPositions[4]; Debug.logInfo("fieldPage=" + fieldPage + ", fieldLlx=" + fieldLlx + ", fieldLly=" + fieldLly + ", fieldUrx=" + fieldUrx + ", fieldUry=" + fieldUry, module); Long sequenceNum = null; if (tabPage != null && tabOrder != null) { sequenceNum = Long.valueOf(tabPage.intValue() * 1000 + tabOrder.intValue()); Debug.logInfo("tabPage=" + tabPage + ", tabOrder=" + tabOrder + ", sequenceNum=" + sequenceNum, module); } else if (fieldPositions.length > 0) { sequenceNum = Long.valueOf((long) fieldPage * 10000 + (long) fieldLly * 1000 + (long) fieldLlx); Debug.logInfo("fieldPage=" + fieldPage + ", fieldLlx=" + fieldLlx + ", fieldLly=" + fieldLly + ", fieldUrx=" + fieldUrx + ", fieldUry=" + fieldUry + ", sequenceNum=" + sequenceNum, module); } // TODO: need to find something better to put into these fields... String annotation = null; for (int k = 0; k < item.size(); ++k) { PdfDictionary dict = item.getWidget(k); // if the "/Type" value is "/Annot", then get the value of "/TU" for the annotation PdfObject typeValue = null; PdfObject tuValue = null; Set<PdfName> dictKeys = UtilGenerics.checkSet(dict.getKeys()); for (PdfName dictKeyName : dictKeys) { PdfObject dictObject = dict.get(dictKeyName); if ("/Type".equals(dictKeyName.toString())) { typeValue = dictObject; } else if ("/TU".equals(dictKeyName.toString())) { tuValue = dictObject; } } if (tuValue != null && typeValue != null && "/Annot".equals(typeValue.toString())) { annotation = tuValue.toString(); } } surveyQuestion.set("description", fieldName); if (UtilValidate.isNotEmpty(annotation)) { surveyQuestion.set("question", annotation); } else { surveyQuestion.set("question", fieldName); } GenericValue surveyQuestionAppl = delegator.makeValue("SurveyQuestionAppl", UtilMisc.toMap("surveyId", surveyId, "surveyQuestionId", surveyQuestionId)); surveyQuestionAppl.set("fromDate", nowTimestamp); surveyQuestionAppl.set("externalFieldRef", fieldName); if (sequenceNum != null) { surveyQuestionAppl.set("sequenceNum", sequenceNum); } surveyQuestion.create(); surveyQuestionAppl.create(); } pdfStamper.close(); if (UtilValidate.isNotEmpty(contentId)) { survey = EntityQuery.use(delegator).from("Survey").where("surveyId", surveyId).queryOne(); survey.set("acroFormContentId", contentId); survey.store(); } } catch (GenericEntityException e) { Debug.logError(e, "Error generating PDF: " + e.toString(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.toString()), locale)); } catch (GeneralException e) { Debug.logError(e, "Error generating PDF: " + e.getMessage(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.getMessage()), locale)); } catch (Exception e) { Debug.logError(e, "Error generating PDF: " + e.toString(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.toString()), locale)); } Map<String, Object> results = ServiceUtil.returnSuccess(); results.put("surveyId", surveyId); return results; }
From source file:org.archive.modules.extractor.PDFParser.java
License:Apache License
/** * Parse a PdfDictionary, looking for URIs recursively and adding * them to foundURIs// ww w . j a va 2 s . c o m * @param entity */ @SuppressWarnings("unchecked") protected void extractURIs(PdfObject entity) { // deal with dictionaries if (entity.isDictionary()) { PdfDictionary dictionary = (PdfDictionary) entity; Set<PdfName> allkeys = dictionary.getKeys(); for (PdfName key : allkeys) { PdfObject value = dictionary.get(key); // see if it's the key is a UR[I,L] if (key.toString().equals("/URI") || key.toString().equals("/URL")) { foundURIs.add(value.toString()); } else { this.extractURIs(value); } } // deal with arrays } else if (entity.isArray()) { PdfArray array = (PdfArray) entity; for (PdfObject pdfObject : (Iterable<PdfObject>) array.getArrayList()) { this.extractURIs(pdfObject); } // deal with indirect references } else if (entity.getClass() == PRIndirectReference.class) { PRIndirectReference indirect = (PRIndirectReference) entity; // if we've already seen a reference to this object if (haveSeen(indirect.getGeneration(), indirect.getNumber())) { return; // note that we've seen it if it's new } else { markAsSeen(indirect.getGeneration(), indirect.getNumber()); } // dereference the "pointer" and process the object indirect.getReader(); // FIXME: examine side-effects PdfObject direct = PdfReader.getPdfObject(indirect); this.extractURIs(direct); } }
From source file:org.ofbiz.content.survey.PdfSurveyServices.java
License:Apache License
/** * *//*from w w w. ja v a 2s .c o m*/ public static Map<String, Object> buildSurveyFromPdf(DispatchContext dctx, Map<String, ? extends Object> context) { Delegator delegator = dctx.getDelegator(); LocalDispatcher dispatcher = dctx.getDispatcher(); GenericValue userLogin = (GenericValue) context.get("userLogin"); Locale locale = (Locale) context.get("locale"); Timestamp nowTimestamp = UtilDateTime.nowTimestamp(); String surveyId = null; try { String surveyName = (String) context.get("surveyName"); ByteArrayOutputStream os = new ByteArrayOutputStream(); ByteBuffer byteBuffer = getInputByteBuffer(context, delegator); PdfReader pdfReader = new PdfReader(byteBuffer.array()); PdfStamper pdfStamper = new PdfStamper(pdfReader, os); AcroFields acroFields = pdfStamper.getAcroFields(); Map<String, Object> acroFieldMap = UtilGenerics.checkMap(acroFields.getFields()); String contentId = (String) context.get("contentId"); GenericValue survey = null; surveyId = (String) context.get("surveyId"); if (UtilValidate.isEmpty(surveyId)) { survey = delegator.makeValue("Survey", UtilMisc.toMap("surveyName", surveyName)); survey.set("surveyId", surveyId); survey.set("allowMultiple", "Y"); survey.set("allowUpdate", "Y"); survey = delegator.createSetNextSeqId(survey); surveyId = survey.getString("surveyId"); } // create a SurveyQuestionCategory to put the questions in Map<String, Object> createCategoryResultMap = dispatcher.runSync("createSurveyQuestionCategory", UtilMisc.<String, Object>toMap("description", "From AcroForm in Content [" + contentId + "] for Survey [" + surveyId + "]", "userLogin", userLogin)); String surveyQuestionCategoryId = (String) createCategoryResultMap.get("surveyQuestionCategoryId"); pdfStamper.setFormFlattening(true); for (String fieldName : acroFieldMap.keySet()) { AcroFields.Item item = acroFields.getFieldItem(fieldName); int type = acroFields.getFieldType(fieldName); String value = acroFields.getField(fieldName); Debug.logInfo("fieldName:" + fieldName + "; item: " + item + "; value: " + value, module); GenericValue surveyQuestion = delegator.makeValue("SurveyQuestion", UtilMisc.toMap("question", fieldName)); String surveyQuestionId = delegator.getNextSeqId("SurveyQuestion"); surveyQuestion.set("surveyQuestionId", surveyQuestionId); surveyQuestion.set("surveyQuestionCategoryId", surveyQuestionCategoryId); if (type == AcroFields.FIELD_TYPE_TEXT) { surveyQuestion.set("surveyQuestionTypeId", "TEXT_SHORT"); } else if (type == AcroFields.FIELD_TYPE_RADIOBUTTON) { surveyQuestion.set("surveyQuestionTypeId", "OPTION"); } else if (type == AcroFields.FIELD_TYPE_LIST || type == AcroFields.FIELD_TYPE_COMBO) { surveyQuestion.set("surveyQuestionTypeId", "OPTION"); // TODO: handle these specially with the acroFields.getListOptionDisplay (and getListOptionExport?) /*String[] listOptionDisplayArray = acroFields.getListOptionDisplay(fieldName); String[] listOptionExportArray = acroFields.getListOptionExport(fieldName); Debug.logInfo("listOptionDisplayArray: " + listOptionDisplayArray + "; listOptionExportArray: " + listOptionExportArray, module);*/ } else { surveyQuestion.set("surveyQuestionTypeId", "TEXT_SHORT"); Debug.logWarning("Building Survey from PDF, fieldName=[" + fieldName + "]: don't know how to handle field type: " + type + "; defaulting to short text", module); } // ==== create a good sequenceNum based on tab order or if no tab order then the page location Integer tabPage = item.getPage(0); Integer tabOrder = item.getTabOrder(0); Debug.logInfo("tabPage=" + tabPage + ", tabOrder=" + tabOrder, module); //array of float multiple of 5. For each of this groups the values are: [page, llx, lly, urx, ury] float[] fieldPositions = acroFields.getFieldPositions(fieldName); float fieldPage = fieldPositions[0]; float fieldLlx = fieldPositions[1]; float fieldLly = fieldPositions[2]; float fieldUrx = fieldPositions[3]; float fieldUry = fieldPositions[4]; Debug.logInfo("fieldPage=" + fieldPage + ", fieldLlx=" + fieldLlx + ", fieldLly=" + fieldLly + ", fieldUrx=" + fieldUrx + ", fieldUry=" + fieldUry, module); Long sequenceNum = null; if (tabPage != null && tabOrder != null) { sequenceNum = Long.valueOf(tabPage.intValue() * 1000 + tabOrder.intValue()); Debug.logInfo("tabPage=" + tabPage + ", tabOrder=" + tabOrder + ", sequenceNum=" + sequenceNum, module); } else if (fieldPositions.length > 0) { sequenceNum = Long.valueOf((long) fieldPage * 10000 + (long) fieldLly * 1000 + (long) fieldLlx); Debug.logInfo("fieldPage=" + fieldPage + ", fieldLlx=" + fieldLlx + ", fieldLly=" + fieldLly + ", fieldUrx=" + fieldUrx + ", fieldUry=" + fieldUry + ", sequenceNum=" + sequenceNum, module); } // TODO: need to find something better to put into these fields... String annotation = null; for (int k = 0; k < item.size(); ++k) { PdfDictionary dict = item.getWidget(k); // if the "/Type" value is "/Annot", then get the value of "/TU" for the annotation /* Interesting... this doesn't work, I guess we have to iterate to find the stuff... PdfObject typeValue = dict.get(new PdfName("/Type")); if (typeValue != null && "/Annot".equals(typeValue.toString())) { PdfObject tuValue = dict.get(new PdfName("/TU")); annotation = tuValue.toString(); } */ PdfObject typeValue = null; PdfObject tuValue = null; Set<PdfName> dictKeys = UtilGenerics.checkSet(dict.getKeys()); for (PdfName dictKeyName : dictKeys) { PdfObject dictObject = dict.get(dictKeyName); if ("/Type".equals(dictKeyName.toString())) { typeValue = dictObject; } else if ("/TU".equals(dictKeyName.toString())) { tuValue = dictObject; } //Debug.logInfo("AcroForm widget fieldName[" + fieldName + "] dictKey[" + dictKeyName.toString() + "] dictValue[" + dictObject.toString() + "]", module); } if (tuValue != null && typeValue != null && "/Annot".equals(typeValue.toString())) { annotation = tuValue.toString(); } } surveyQuestion.set("description", fieldName); if (UtilValidate.isNotEmpty(annotation)) { surveyQuestion.set("question", annotation); } else { surveyQuestion.set("question", fieldName); } GenericValue surveyQuestionAppl = delegator.makeValue("SurveyQuestionAppl", UtilMisc.toMap("surveyId", surveyId, "surveyQuestionId", surveyQuestionId)); surveyQuestionAppl.set("fromDate", nowTimestamp); surveyQuestionAppl.set("externalFieldRef", fieldName); if (sequenceNum != null) { surveyQuestionAppl.set("sequenceNum", sequenceNum); } surveyQuestion.create(); surveyQuestionAppl.create(); } pdfStamper.close(); if (UtilValidate.isNotEmpty(contentId)) { survey = EntityQuery.use(delegator).from("Survey").where("surveyId", surveyId).queryOne(); survey.set("acroFormContentId", contentId); survey.store(); } } catch (GenericEntityException e) { Debug.logError(e, "Error generating PDF: " + e.toString(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.toString()), locale)); } catch (GeneralException e) { Debug.logError(e, "Error generating PDF: " + e.getMessage(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.getMessage()), locale)); } catch (Exception e) { Debug.logError(e, "Error generating PDF: " + e.toString(), module); return ServiceUtil.returnError(UtilProperties.getMessage(resource, "ContentPDFGeneratingError", UtilMisc.toMap("errorString", e.toString()), locale)); } Map<String, Object> results = ServiceUtil.returnSuccess(); results.put("surveyId", surveyId); return results; }