List of usage examples for org.apache.pdfbox.pdmodel PDDocument getPages
public PDPageTree getPages()
From source file:dev.ztgnrw.ExtractEmbeddedFiles.java
License:Apache License
/** * This is the main method./*from w w w . j a v a2 s.c om*/ * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void extractEmbeddedFiles(String file) throws IOException { PDDocument document = null; try { File pdfFile = new File(file); String filePath = pdfFile.getParent() + System.getProperty("file.separator"); document = PDDocument.load(pdfFile); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree != null) { Map<String, PDComplexFileSpecification> names = efTree.getNames(); if (names != null) { extractFiles(names, filePath); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { names = node.getNames(); extractFiles(names, filePath); } } } // extract files from annotations for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment .getFile(); PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); extractFile(filePath, fileSpec.getFilename(), embeddedFile); } } } } finally { if (document != null) { document.close(); } } }
From source file:eu.europa.esig.dss.pades.signature.PAdESVisibleSignaturePositionTest.java
License:Open Source License
private void checkRotation(InputStream inputStream, int rotate) throws IOException { PDDocument document = PDDocument.load(inputStream); Assert.assertEquals(rotate, document.getPages().get(0).getRotation()); }
From source file:eu.europa.esig.dss.pades.signature.PAdESVisibleSignaturePositionTest.java
License:Open Source License
private void checkImageSimilarityPdf(String samplePdf, String checkPdf, float similarity) throws IOException { DSSDocument document = sign(signablePdfs.get(samplePdf)); PDDocument sampleDocument = PDDocument.load(document.openStream()); PDDocument checkDocument = PDDocument .load(getClass().getResourceAsStream("/visualSignature/check/" + checkPdf)); PDPageTree samplePageTree = sampleDocument.getPages(); PDPageTree checkPageTree = checkDocument.getPages(); Assert.assertEquals(checkPageTree.getCount(), samplePageTree.getCount()); PDFRenderer sampleRenderer = new PDFRenderer(sampleDocument); PDFRenderer checkRenderer = new PDFRenderer(checkDocument); for (int pageNumber = 0; pageNumber < checkPageTree.getCount(); pageNumber++) { BufferedImage sampleImage = sampleRenderer.renderImageWithDPI(pageNumber, DPI); BufferedImage checkImage = checkRenderer.renderImageWithDPI(pageNumber, DPI); float checkSimilarity = checkImageSimilarity(sampleImage, checkImage, CHECK_RESOLUTION); float calculatedSimilarity = ((int) (similarity * 100f)) / 100f; // calulate rotated position has about 1 // pixel position difference Assert.assertTrue(checkSimilarity >= calculatedSimilarity); }//from w ww. j a v a 2 s . co m }
From source file:helper.pdfpreprocessing.pdf.TextHighlight.java
License:Apache License
public void initialize(final PDDocument pdf) throws IOException { try {/* w w w . j ava 2 s . c o m*/ resetEngine(); document = pdf; textCache = new TextCache(); if (getAddMoreFormatting()) { setParagraphEnd(getLineSeparator()); setPageStart(getLineSeparator()); setArticleStart(getLineSeparator()); setArticleEnd(getLineSeparator()); } startDocument(pdf); processPages(pdf.getPages()); endDocument(pdf); } catch (Exception e) { e.printStackTrace(); } catch (Error e) { e.printStackTrace(); } }
From source file:model.util.pdf.PDFUtils.java
License:Apache License
private static void changeCropBox(PDDocument document, float a, float b, float c, float d) { for (PDPage page : document.getPages()) { PDRectangle rectangle = new PDRectangle(); rectangle.setLowerLeftX(a);//from ww w .j a va 2 s .c o m rectangle.setLowerLeftY(b); rectangle.setUpperRightX(c); rectangle.setUpperRightY(d); page.setCropBox(rectangle); } }
From source file:net.bookinaction.ExtractAnnotations.java
License:Apache License
public void doJob(String job, Float[] pA) throws IOException { PDDocument document = null; Stamper s = new Stamper(); // utility class final String job_file = job + ".pdf"; final String dic_file = job + "-dict.txt"; final String new_job = job + "-new.pdf"; PrintWriter writer = new PrintWriter(dic_file); ImageLocationListener imageLocationsListener = new ImageLocationListener(); AnnotationMaker annotMaker = new AnnotationMaker(); try {//from ww w.j a v a2 s . c o m document = PDDocument.load(new File(job_file)); int pageNum = 0; for (PDPage page : document.getPages()) { pageNum++; PDRectangle cropBox = page.getCropBox(); List<PDAnnotation> annotations = page.getAnnotations(); // extract image locations List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>(); imageLocationsListener.setImageRects(imageRects); imageLocationsListener.processPage(page); int im = 0; for (Rectangle2D pdImageRect : imageRects) { s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect); annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect, "[im" + im + "]")); im++; } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); int j = 0; List<PDAnnotation> viableAnnots = new ArrayList(); for (PDAnnotation annot : annotations) { if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) { stripper.addRegion(Integer.toString(j++), s.getAwtRect( s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox)); viableAnnots.add(annot); } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) { viableAnnots.add(annot); } } stripper.extractRegions(page); List<PDRectangle> rects = new ArrayList<PDRectangle>(); List<String> comments = new ArrayList<String>(); List<String> highlightTexts = new ArrayList<String>(); j = 0; for (PDAnnotation viableAnnot : viableAnnots) { if (viableAnnot instanceof PDAnnotationTextMarkup) { String highlightText = stripper.getTextForRegion(Integer.toString(j++)); String withoutCR = highlightText.replace((char) 0x0A, '^'); String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]); rects.add(aRect); comments.add(comment); highlightTexts.add(highlightText); s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString); } else if (viableAnnot instanceof PDAnnotationText) { String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); for (Rectangle2D pdImageRect : imageRects) { if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(), viableAnnot.getRectangle().getLowerLeftY())) { s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect, colorString); annotations.add(annotMaker.squareAnnotation(Color.GREEN, (Rectangle2D.Float) pdImageRect, comment)); } ; } } } PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true); int i = 0; for (PDRectangle pdRect : rects) { String comment = comments.get(i); String highlightText = highlightTexts.get(i); //annotations.add(linkAnnotation(pdRect, comment, highlightText)); //annotations.add(annotationSquareCircle(pdRect, BLUE)); s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(), pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE); i++; } canvas.close(); } writer.close(); document.save(new_job); } finally { if (document != null) { document.close(); } } }
From source file:org.example.extractimagesfrompdfpages.ExtractImagesFromPDFPagesMain.java
public static void main(String[] args) { try {/*from w w w .j av a2s .c o m*/ File thePDFFile = new File(args[0]); PDDocument document = PDDocument.load(thePDFFile); PDPageTree list = document.getPages(); int i = 1; for (PDPage page : list) { Boolean alreadyCreatedFolderForThisPage = false; File thePDFFileDirectory = thePDFFile.getParentFile(); File thePDFPageFolder = new File(thePDFFileDirectory.getAbsolutePath() + "/temp_images" + "/" + i); PDResources pdResources = page.getResources(); int j = 1; for (COSName c : pdResources.getXObjectNames()) { PDXObject o = pdResources.getXObject(c); if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) { if (alreadyCreatedFolderForThisPage == false) { thePDFPageFolder.mkdirs(); alreadyCreatedFolderForThisPage = true; } File file = new File(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png"); ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) o).getImage(), "png", file); System.out.println(thePDFPageFolder.getAbsolutePath() + "/" + j + ".png"); j++; } } i++; } } catch (IOException ex) { Logger.getLogger(ExtractImagesFromPDFPagesMain.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex); } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Save all Attached (embedded) files to some directory. * /*from www.ja v a 2 s . c om*/ * @param pdfFile * Source PDF file. * @param outputDir * Target directory. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java?view=markup */ public static void saveAttachments(final File pdfFile, final File outputDir) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Extract Embedded (attached) files. final PDDocumentNameDictionary documentNameDictionary = new PDDocumentNameDictionary( document.getDocumentCatalog()); final PDEmbeddedFilesNameTreeNode embeddedFilesNameTree = documentNameDictionary.getEmbeddedFiles(); if (embeddedFilesNameTree != null) { extractFiles(outputDir, embeddedFilesNameTree.getNames()); final List<PDNameTreeNode<PDComplexFileSpecification>> kids = embeddedFilesNameTree.getKids(); if (kids != null) { for (PDNameTreeNode<PDComplexFileSpecification> nameTreeNode : kids) { extractFiles(outputDir, nameTreeNode.getNames()); } } } // Extract Embedded (attached) from annotations. for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { final PDAnnotationFileAttachment fileAttach = (PDAnnotationFileAttachment) annotation; final PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fileAttach .getFile(); extractFile(outputDir, fileSpec); } } } } finally { if (document != null) { document.close(); } } }
From source file:org.pdfsam.pdfbox.PDFBoxOutlineLevelsHandler.java
License:Open Source License
public PDFBoxOutlineLevelsHandler(PDDocument document, String matchingTitleRegEx) { requireNonNull(document, "Unable to retrieve bookmarks from a null document."); this.document = document; this.pages = document.getPages(); PDDocumentNameDictionary names = document.getDocumentCatalog().getNames(); if (names != null) { this.namedDestinations = names.getDests(); }/*from w w w. j a v a 2s . com*/ if (isNotBlank(matchingTitleRegEx)) { this.titleMatchingPattern = Pattern.compile(matchingTitleRegEx); } }
From source file:src.controller.DocumentController.java
/** * Convertion en image/* ww w. j av a2s . c om*/ * * @param file * @param extension */ public void convertToImage(File file, String extension) { PDDocument document; try { document = PDDocument.load(file); PDFRenderer pdfRenderer = new PDFRenderer(document); int pageCounter = 0; for (PDPage lapage : document.getPages()) { // note that the page number parameter is zero based BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.RGB); // suffix in filename will be used as the file format ImageIOUtil.writeImage(bim, file + "-" + (pageCounter++) + extension, 300); } document.close(); } catch (IOException ex) { Logger.getLogger(DocumentController.class.getName()).log(Level.SEVERE, null, ex); } }