List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog
public PDDocumentCatalog getDocumentCatalog()
From source file:cx.fbn.nevernote.gui.PDFPreview.java
License:Open Source License
public boolean setupPreview(String filePath, String appl, int pageNumber) { // Fix stupid Windows file separation characters String whichOS = System.getProperty("os.name"); if (whichOS.contains("Windows")) { filePath = filePath.replace("\\", "/"); }/*from ww w . j av a 2s .c o m*/ if (appl.equals("pdf")) { PDDocument document = null; try { document = PDDocument.load(filePath); if (document.getNumberOfPages() <= pageNumber) return false; if (document.getDocumentCatalog().getAllPages().size() <= pageNumber) return false; PDPage page = (PDPage) document.getDocumentCatalog().getAllPages().get(pageNumber); BufferedImage bi = page.convertToImage(); File outputfile; outputfile = new File(filePath + ".png"); ImageIO.write(bi, "png", outputfile); return true; } catch (IOException e1) { return false; } } return false; }
From source file:cz.fi.muni.xkremser.editor.server.fedora.KrameriusImageSupport.java
License:Open Source License
/** * Read image./*from w w w .j a v a2s .c om*/ * * @param url * the url * @param type * the type * @param page * the page * @return the image * @throws IOException * Signals that an I/O exception has occurred. */ public static Image readImage(URL url, ImageMimeType type, int page) throws IOException { if (type.javaNativeSupport()) { return ImageIO.read(url.openStream()); } else if ((type.equals(ImageMimeType.DJVU)) || (type.equals(ImageMimeType.VNDDJVU)) || (type.equals(ImageMimeType.XDJVU))) { com.lizardtech.djvu.Document doc = new com.lizardtech.djvu.Document(url); doc.setAsync(false); DjVuPage[] p = new DjVuPage[1]; // read page from the document - index 0, priority 1, favorFast true int size = doc.size(); if ((page != 0) && (page >= size)) { page = 0; } p[0] = doc.getPage(page, 1, true); p[0].setAsync(false); DjVuImage djvuImage = new DjVuImage(p, true); Rectangle pageBounds = djvuImage.getPageBounds(0); Image[] images = djvuImage.getImage(new JPanel(), new Rectangle(pageBounds.width, pageBounds.height)); if (images.length == 1) { Image img = images[0]; return img; } else return null; } else if (type.equals(ImageMimeType.PDF)) { PDDocument document = null; try { document = PDDocument.load(url.openStream()); int resolution = 96; List<?> pages = document.getDocumentCatalog().getAllPages(); PDPage pdPage = (PDPage) pages.get(page); BufferedImage image = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, resolution); return image; } finally { if (document != null) { document.close(); } } } else throw new IllegalArgumentException("unsupported mimetype '" + type.getValue() + "'"); }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param prefix /*from ww w . j a va 2 s . co m*/ * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet // * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException("pdfFile must be defined"); } InputStream inputStream = null; if (password != null) { try { log.debug("PDF probably encrypted, trying to decrypt using given password {}", password); ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.entrySet().iterator(); while (xobjIter.hasNext()) { Map.Entry entry = (Map.Entry) xobjIter.next(); String key = (String) entry.getKey(); PDXObject xobj = (PDXObject) entry.getValue(); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.entrySet().iterator(); while (imageIter.hasNext()) { Map.Entry imEntry = (Map.Entry) imageIter.next(); String imKey = (String) imEntry.getKey(); PDXObjectImage image = (PDXObjectImage) imEntry.getValue(); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1 && !binarize) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (RuntimeException ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet// www.j ava 2 s.c o m * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess, Boolean silent, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException(pdfFile); } String prefix = null; InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(pdfFile, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.keySet().iterator(); while (xobjIter.hasNext()) { String key = (String) xobjIter.next(); PDXObject xobj = (PDXObject) xobjs.get(key); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String imKey = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(imKey); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.info("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:de.berber.kindle.annotator.lib.PDFAnnotator.java
License:Apache License
@SuppressWarnings("unchecked") public boolean run() { // read all annotations final List<Annotation> annotations = new KindleAnnotationReader(cc, pdfFile).read(); if (annotations.size() == 0) { return true; }/*from www . jav a 2 s.com*/ PDDocument document = null; // annotate pdf try { document = PDDocument.load(pdfFile); //inDocument.decrypt(pass); // get outline for bookmarks PDDocumentOutline documentOutline = document.getDocumentCatalog().getDocumentOutline(); if (documentOutline == null) { // if there is no document outline we have to create a new one. documentOutline = new PDDocumentOutline(); document.getDocumentCatalog().setDocumentOutline(documentOutline); } assert documentOutline != null; // convert annotations for each page int pageNumber = 0; for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { for (final Annotation dxAnn : annotations) { dxAnn.toPDAnnotation(pageNumber, documentOutline, page); } pageNumber++; } //inDocument.setAllSecurityToBeRemoved(true); document.save(outFile.toString()); } catch (FileNotFoundException e) { LOG.error("Could not find input file " + pdfFile); return false; } catch (IOException e) { LOG.error("IOError while writing result file " + outFile); return false; } catch (COSVisitorException e) { LOG.error("PDFBox error while storing result file " + outFile); return false; } finally { if (document != null) { try { document.close(); } catch (IOException e) { LOG.error("Error while closing PDF document " + pdfFile); } } } return true; }
From source file:de.mirkosertic.desktopsearch.pdfpreview.PDFPreviewGenerator.java
License:Open Source License
@Override public synchronized Preview createPreviewFor(File aFile) { PDDocument theDocument = null; try {//from w w w.jav a2s .co m theDocument = PDDocument.load(aFile); List<?> thePages = theDocument.getDocumentCatalog().getAllPages(); if (thePages.isEmpty()) { return null; } PDPage theFirstPage = (PDPage) thePages.get(0); PDRectangle mBox = theFirstPage.findMediaBox(); float theWidthPt = mBox.getWidth(); float theHeightPt = mBox.getHeight(); int theWidthPx = THUMB_WIDTH; // Math.round(widthPt * scaling); int theHeightPx = THUMB_HEIGHT; // Math.round(heightPt * scaling); float theScaling = THUMB_WIDTH / theWidthPt; // resolution / 72.0F; Dimension thePageDimension = new Dimension((int) theWidthPt, (int) theHeightPt); BufferedImage theImage = new BufferedImage(theWidthPx, theHeightPx, BufferedImage.TYPE_INT_RGB); Graphics2D theGraphics = (Graphics2D) theImage.getGraphics(); theGraphics.setBackground(new Color(255, 255, 255, 0)); theGraphics.clearRect(0, 0, theImage.getWidth(), theImage.getHeight()); theGraphics.scale(theScaling, theScaling); PageDrawer theDrawer = new PageDrawer(); theDrawer.drawPage(theGraphics, theFirstPage, thePageDimension); int rotation = theFirstPage.findRotation(); if ((rotation == 90) || (rotation == 270)) { int w = theImage.getWidth(); int h = theImage.getHeight(); BufferedImage rotatedImg = new BufferedImage(w, h, theImage.getType()); Graphics2D g = rotatedImg.createGraphics(); g.rotate(Math.toRadians(rotation), w / 2, h / 2); g.drawImage(theImage, null, 0, 0); } theGraphics.dispose(); return new Preview(ImageUtils.rescale(theImage, THUMB_WIDTH, THUMB_HEIGHT, ImageUtils.RescaleMethod.RESIZE_FIT_ONE_DIMENSION)); } catch (Exception e) { LOGGER.error("Error creating preview for " + aFile, e); return null; } finally { try { // Always close the document theDocument.close(); } catch (Exception e) { } } }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFApachePDFBoxExtractor.java
License:Apache License
@SuppressWarnings("unchecked") public byte[] getPDFPages(int fromPageNumber, int toPageNumber) { ByteArrayOutputStream byteArrayOutputStream = null; boolean extractionSuccessful = false; if (pdfDocument != null) { int numberOfPages = getNumberOfPages(); /*//from w w w . ja v a 2s .co m * Check if the given page numbers are in the allowed range. */ if (fromPageNumber > 0 && fromPageNumber <= numberOfPages && toPageNumber > 0 && toPageNumber <= numberOfPages) { /* * Now check if the given fromPageNumber is smaller * as the given toPageNumber. If not swap the numbers. */ if (fromPageNumber > toPageNumber) { int tmpPageNumber = toPageNumber; toPageNumber = fromPageNumber; fromPageNumber = tmpPageNumber; } /* * Now extract the pages * * NOTE * ==== * Since Apache PDFBox v1.5.0 there exists the class * org.apache.pdfbox.util.PageExtractor */ /* boolean isApachePageExtractorAvailable = false; Class<?> pageExtractorClass = null; try { pageExtractorClass = getClass().getClassLoader().loadClass("org.apache.pdfbox.util.PageExtractor"); Constructor<?> pdfExtractConstructor = pageExtractorClass.getConstructor(PDDocument.class, int.class, int.class); Method pdfExtractMethod = pageExtractorClass.getMethod("extract"); isApachePageExtractorAvailable = true; } catch (ClassNotFoundException ex) { } catch (SecurityException ex) { } catch (NoSuchMethodException ex) { } */ try { PDDocument extractedDocumentPages = new PDDocument(); extractedDocumentPages.setDocumentInformation(this.pdfDocument.getDocumentInformation()); extractedDocumentPages.getDocumentCatalog() .setViewerPreferences(this.pdfDocument.getDocumentCatalog().getViewerPreferences()); List<PDPage> pages = (List<PDPage>) this.pdfDocument.getDocumentCatalog().getAllPages(); int pageCounter = 1; for (PDPage page : pages) { if (pageCounter >= fromPageNumber && pageCounter <= toPageNumber) { PDPage importedPdfPage; importedPdfPage = extractedDocumentPages.importPage(page); importedPdfPage.setCropBox(page.findCropBox()); importedPdfPage.setMediaBox(page.findMediaBox()); importedPdfPage.setResources(page.findResources()); importedPdfPage.setRotation(page.findRotation()); } pageCounter++; } // end for byteArrayOutputStream = new ByteArrayOutputStream(); extractedDocumentPages.save(byteArrayOutputStream); extractedDocumentPages.close(); extractionSuccessful = true; } catch (COSVisitorException ex) { // TODO: Create an own exception for PDF processing errors. logger.error("An exception occurred while extracting " + "pages from the input PDF file.", ex); } catch (IOException ex) { // TODO: Create an own exception for PDF processing errors. logger.error("An exception occurred while extracting " + "pages from the input PDF file.", ex); } finally { if (!extractionSuccessful) { byteArrayOutputStream = null; } } // end try..catch..finally } // end if checking range of given pages } // end if (pdfDocument != null) if (byteArrayOutputStream != null) { return byteArrayOutputStream.toByteArray(); } return null; }
From source file:de.prozesskraft.pkraft.Createdoc.java
/** * merge the pdfs/* w w w. ja va 2 s.co m*/ */ private static void mergePdf(Map<String, String> pdfRankFiles, String output) { System.out.println("merging pdfs to a single file"); Set<String> keySet = pdfRankFiles.keySet(); ArrayList<String> listKey = new ArrayList(keySet); Collections.sort(listKey); try { PDDocument document = new PDDocument(); // if(document.getNumberOfPages() > 0) // { // System.out.println("deleting empty page"); // document.removePage(0); // } for (String actualKey : listKey) { PDDocument part = PDDocument.load(pdfRankFiles.get(actualKey)); System.out.println("merging " + pdfRankFiles.get(actualKey)); ArrayList<PDPage> list = (ArrayList<PDPage>) part.getDocumentCatalog().getAllPages(); for (PDPage page : list) { document.addPage(page); } } try { System.out.println("writing " + output); document.save(output); } catch (COSVisitorException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@Override public void generateThumbnail(final File input, final File output) throws IOException, ThumbnailerException { FileUtils.deleteQuietly(output);//from w w w.ja va 2s . com PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<?> pages = document.getDocumentCatalog().getAllPages(); final PDPage page = (PDPage) pages.get(0); final BufferedImage tmpImage = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); if (tmpImage.getWidth() == this.thumbWidth) { ImageIO.write(tmpImage, PDFBoxThumbnailer.OUTPUT_FORMAT, output); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.NO_RESIZE_ONLY_CROP; resizer.setInputImage(tmpImage); resizer.writeOutput(output); } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { } } } }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from ww w . j a v a 2 s . c o m*/ public void generateThumbnails(final File input, final File outputFolder) throws IOException, ThumbnailerException { PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); int pageNumber = 0; for (final PDPage page : allPages) { final BufferedImage image = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); final File outputFile = ThumbnailNamer.getFile(outputFolder, pageNumber); if (image.getWidth() == this.thumbWidth) { ImageIO.write(image, PDFBoxThumbnailer.OUTPUT_FORMAT, outputFile); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.RESIZE_FIT_BOTH_DIMENSIONS; resizer.setInputImage(image); resizer.writeOutput(outputFile); } pageNumber++; } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { // swallow exception on closing. } } } }