List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from ww w. j av a 2 s .com*/ public void generateThumbnails(final File input, final File outputFolder) throws IOException, ThumbnailerException { PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); int pageNumber = 0; for (final PDPage page : allPages) { final BufferedImage image = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); final File outputFile = ThumbnailNamer.getFile(outputFolder, pageNumber); if (image.getWidth() == this.thumbWidth) { ImageIO.write(image, PDFBoxThumbnailer.OUTPUT_FORMAT, outputFile); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.RESIZE_FIT_BOTH_DIMENSIONS; resizer.setInputImage(image); resizer.writeOutput(outputFile); } pageNumber++; } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { // swallow exception on closing. } } } }
From source file:de.uzk.hki.da.convert.PdfService.java
License:Open Source License
/** * Generates a new PDF which only contains certainPages of the original document. * Users can choose if they wish to reduce to a number of pages (beginning from the first page) * or to a certain set of pages. Both options can be used at the same time. * By setting one of the parameters to either "" or null any of the options the according * option will not be used./* w ww.j ava2 s . c om*/ * * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document). * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document. * * @throws IOException * @author Jens Peters * @author Sebastian Cuy * @author Daniel M. de Oliveira */ public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException { PDDocument srcPdf = null; PDDocument targetPdf = null; if (srcPdfFile == null) throw new IllegalStateException("srcFile not set"); srcPdf = PDDocument.load(srcPdfFile); targetPdf = new PDDocument(); @SuppressWarnings("rawtypes") List srcPages = srcPdf.getDocumentCatalog().getAllPages(); int numberOfPages = 0; if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) { numberOfPages = Integer.parseInt(numberOfPagesText); for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (StringUtilities.isNotSet(numberOfPagesText) && StringUtilities.isNotSet(certainPagesText)) { for (int i = 0; i < srcPages.size(); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (certainPagesText != null && !certainPagesText.isEmpty()) { String[] certainPagesTexts = certainPagesText.split(" "); int[] certainPages = new int[certainPagesTexts.length]; for (int i = 0; i < certainPagesTexts.length; i++) { certainPages[i] = Integer.parseInt(certainPagesTexts[i]); } Arrays.sort(certainPages); for (int i = 0; i < certainPages.length; i++) { if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1) targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1)); } } try { targetPdf.save(targetPdfFile); } catch (Exception e) { throw new RuntimeException("Unable to create PDF!", e); } finally { targetPdf.close(); srcPdf.close(); } }
From source file:de.uzk.hki.da.convert.PublishPDFConversionStrategyTests.java
License:Open Source License
/** * Test./*w w w. j a va 2 s . c o m*/ * * @throws IOException Signals that an I/O exception has occurred. */ @Test public void test() throws IOException { o = TESTHelper.setUpObject("1", new RelativePath(workAreaRootPath)); WorkArea wa = new WorkArea(n, o); PublicationRight right = new PublicationRight(); right.setAudience(Audience.PUBLIC); right.setTextRestriction(new TextRestriction()); right.getTextRestriction().setCertainPages(new int[] { 1, 2, 7, 10, 12, 14, 15 }); o.getRights().getPublicationRights().add(right); DAFile sourceFile = new DAFile("a", "filename.pdf"); ConversionInstruction ci = new ConversionInstruction(); ci.setSource_file(sourceFile); ci.setTarget_folder("target/"); ConversionRoutine cr = new ConversionRoutine(); cr.setTarget_suffix("pdf"); ci.setConversion_routine(cr); cs.setObject(o); List<Event> events = cs.convertFile(new WorkArea(n, o), ci); File targetFile = wa.toFile(events.get(0).getTarget_file()); assertTrue(targetFile.exists()); assertEquals("filename.pdf", targetFile.getName()); // contract states that the PDF should have 7 pages PDDocument targetDoc = PDDocument.load(targetFile); assertEquals(7, targetDoc.getDocumentCatalog().getAllPages().size()); targetFile = wa.toFile(events.get(1).getTarget_file()); assertTrue(targetFile.exists()); assertEquals("filename.pdf", targetFile.getName()); // contract has no restrictions for institution, PDF should have targetDoc = PDDocument.load(targetFile); assertEquals(227, targetDoc.getDocumentCatalog().getAllPages().size()); }
From source file:de.uzk.hki.da.format.PdfService.java
License:Open Source License
/** * Generates a new PDF which only contains certainPages of the original document. * Users can choose if they wish to reduce to a number of pages (beginning from the first page) * or to a certain set of pages. Both options can be used at the same time. * By setting one of the parameters to either "" or null any of the options the according * option will not be used./*from w w w . ja v a 2s. c om*/ * * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document). * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document. * * @throws IOException * @author Jens Peters * @author Sebastian Cuy * @author Daniel M. de Oliveira */ public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException { PDDocument srcPdf = null; PDDocument targetPdf = null; if (srcPdfFile == null) throw new IllegalStateException("srcFile not set"); srcPdf = PDDocument.load(srcPdfFile); targetPdf = new PDDocument(); @SuppressWarnings("rawtypes") List srcPages = srcPdf.getDocumentCatalog().getAllPages(); int numberOfPages = 0; if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) { numberOfPages = Integer.parseInt(numberOfPagesText); for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (certainPagesText != null && !certainPagesText.isEmpty()) { String[] certainPagesTexts = certainPagesText.split(" "); int[] certainPages = new int[certainPagesTexts.length]; for (int i = 0; i < certainPagesTexts.length; i++) { certainPages[i] = Integer.parseInt(certainPagesTexts[i]); } Arrays.sort(certainPages); for (int i = 0; i < certainPages.length; i++) { if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1) targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1)); } } try { targetPdf.save(targetPdfFile); } catch (Exception e) { throw new RuntimeException("Unable to create PDF!", e); } finally { targetPdf.close(); } }
From source file:de.uzk.hki.da.format.PublishPDFConversionStrategyTests.java
License:Open Source License
/** * Test.//w ww . jav a2s. c om * * @throws IOException Signals that an I/O exception has occurred. */ @Test public void test() throws IOException { o = TESTHelper.setUpObject("1", new RelativePath(workAreaRootPath)); PublicationRight right = new PublicationRight(); right.setAudience(Audience.PUBLIC); right.setTextRestriction(new TextRestriction()); right.getTextRestriction().setCertainPages(new int[] { 1, 2, 7, 10, 12, 14, 15 }); o.getRights().getPublicationRights().add(right); DAFile sourceFile = new DAFile(o.getLatestPackage(), "a", "filename.pdf"); ConversionInstruction ci = new ConversionInstruction(); ci.setSource_file(sourceFile); ci.setTarget_folder("target/"); ConversionRoutine cr = new ConversionRoutine(); cr.setTarget_suffix("pdf"); ci.setConversion_routine(cr); cs.setObject(o); List<Event> events = cs.convertFile(ci); File targetFile = events.get(0).getTarget_file().toRegularFile(); assertTrue(targetFile.exists()); assertEquals("filename.pdf", targetFile.getName()); // contract states that the PDF should have 7 pages PDDocument targetDoc = PDDocument.load(targetFile); assertEquals(7, targetDoc.getDocumentCatalog().getAllPages().size()); targetFile = events.get(1).getTarget_file().toRegularFile(); assertTrue(targetFile.exists()); assertEquals("filename.pdf", targetFile.getName()); // contract has no restrictions for institution, PDF should have targetDoc = PDDocument.load(targetFile); assertEquals(227, targetDoc.getDocumentCatalog().getAllPages().size()); }
From source file:dev.ztgnrw.ExtractEmbeddedFiles.java
License:Apache License
/** * This is the main method./* www .j a va2 s . co m*/ * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void extractEmbeddedFiles(String file) throws IOException { PDDocument document = null; try { File pdfFile = new File(file); String filePath = pdfFile.getParent() + System.getProperty("file.separator"); document = PDDocument.load(pdfFile); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree != null) { Map<String, PDComplexFileSpecification> names = efTree.getNames(); if (names != null) { extractFiles(names, filePath); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { names = node.getNames(); extractFiles(names, filePath); } } } // extract files from annotations for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment .getFile(); PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); extractFile(filePath, fileSpec.getFilename(), embeddedFile); } } } } finally { if (document != null) { document.close(); } } }
From source file:diagramextractor.DiagramExtractor.java
/** * @param args the command line arguments *///from ww w . j a va 2 s . c o m public static void main(String[] args) throws IOException, COSVisitorException { if (args.length < 2) { showHelp(); System.exit(-1); } List<Integer> diagramOptionsList = new LinkedList<>(); diagramOptionsList = parseOptions(args); List<String> diagramNameList = new LinkedList<>(); diagramNameList = getDiagramNames(diagramOptionsList); File inputDir = new File(args[0]); File[] reports = inputDir.listFiles(); String diagramName = args[1]; PDDocument outputDocument = new PDDocument(); PDFMergerUtility merger = new PDFMergerUtility(); merger.setDestinationFileName("output.pdf"); for (File report : reports) { PDDocument doc = PDDocument.load(report); System.out.println("LOADED FILE: " + report.getName()); int pageNumber = 0; System.out.println("NUMBER OF PAGES: " + doc.getNumberOfPages()); for (int i = 0; i <= doc.getNumberOfPages(); i++) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(i); stripper.setEndPage(i); String contents = stripper.getText(doc); boolean containsDiagram = false; for (String diagram : diagramNameList) { if (contents.contains(diagram)) { containsDiagram = true; } } if (containsDiagram && !contents.contains("Table of Contents") && !contents.contains("Table of Figures") && !contents.contains("Obsah") && !contents.contains("Tabulka ?sel")) { pageNumber = i; System.out.println("Diagram found on page: " + pageNumber); PageExtractor extractor = new PageExtractor(doc, pageNumber, pageNumber); PDDocument extractedPage = extractor.extract(); PDPage page = (PDPage) extractedPage.getDocumentCatalog().getAllPages().get(0); PDPageContentStream contentStream = new PDPageContentStream(extractedPage, page, true, true, true); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.moveTextPositionByAmount(100, 50); contentStream.drawString(report.getName()); contentStream.endText(); contentStream.close(); merger.appendDocument(outputDocument, extractedPage); } } if (pageNumber == 0) { System.out.println("The diagram " + diagramName + " was not found in file " + report.getName()); } doc.close(); } merger.mergeDocuments(); System.out.println(); System.out.println("Diagrams have been merged."); String outputFileName = generateFilename(inputDir.getCanonicalPath(), "output.pdf"); outputDocument.save(outputFileName); outputDocument.close(); System.out.println("Output file saved as: " + outputFileName); }
From source file:dk.dma.msinm.legacy.nm.ActiveTempPrelimNmPdfExtractor.java
License:Open Source License
/** * Main method for extracting active list of NtM's * @param noticeIds the list of notices to update *///from w w w . j a va 2s. c o m public void extractActiveNoticeIds(List<SeriesIdentifier> noticeIds) throws Exception { PDDocument document = null; try { PDFTextStripper stripper = new PDFTextStripper(); document = PDDocument.load(inputStream); //stripper.setStartPage(1); String text = stripper.getText(document); // Read the text line by line Pattern p = Pattern.compile(ACTIVE_NM_LINE); BufferedReader br = new BufferedReader(new StringReader(text)); String line; while ((line = br.readLine()) != null) { Matcher m = p.matcher(line.trim()); if (m.matches()) { SeriesIdentifier id = new SeriesIdentifier(); id.setMainType(SeriesIdType.NM); id.setYear(year); id.setNumber(Integer.valueOf(m.group(1))); id.setAuthority(organization); noticeIds.add(id); } } } catch (IOException e) { log.error("Error extracting notices from file " + fileName, e); throw e; } finally { if (document != null) { document.close(); } try { inputStream.close(); } catch (Exception ex) { } } }
From source file:dk.dma.msinm.legacy.nm.NmPdfExtractor.java
License:Open Source License
/** * Main method for extracting the NtM's/*from w w w . j av a2s . c om*/ * @param notices the list of notices to update */ public void extractNotices(List<Message> notices) throws Exception { PDDocument document = null; try { PDFTextStripper stripper = new PDFTextStripper(); document = PDDocument.load(inputStream); stripper.setStartPage(3); String text = stripper.getText(document); List<String> textBlocks = extractNoticeTextBlocks(text); extractNotices(notices, textBlocks); } catch (IOException e) { log.error("Error extracting notivces from file " + fileName, e); throw e; } finally { if (document != null) { document.close(); } try { inputStream.close(); } catch (Exception ex) { } } }
From source file:dpfmanager.shell.modules.report.util.ReportPDF.java
License:Open Source License
/** * Parse a global report to PDF format./* w ww . ja v a 2s. com*/ * * @param pdffile the file name. * @param gr the global report. */ public void parseGlobal(String pdffile, GlobalReport gr) { try { PDFParams pdfParams = new PDFParams(); pdfParams.init(PDPage.PAGE_SIZE_A4); PDFont font = PDType1Font.HELVETICA_BOLD; int pos_x = 200; pdfParams.y = 700; int font_size = 18; // Logo PDXObjectImage ximage = new PDJpeg(pdfParams.getDocument(), getFileStreamFromResources("images/logo.jpg")); float scale = 3; pdfParams.getContentStream().drawXObject(ximage, pos_x, pdfParams.y, 645 / scale, 300 / scale); // Report Title pdfParams.y -= 30; pdfParams = writeText(pdfParams, "MULTIPLE REPORT", pos_x, font, font_size); pdfParams.y -= 30; font_size = 15; pdfParams = writeText(pdfParams, "Processed files: " + gr.getIndividualReports().size(), pos_x, font, font_size, Color.cyan); // Summary table pos_x = 100; pdfParams.y -= 15; font_size = 8; Color col; for (String iso : gr.getCheckedIsos()) { if (gr.getIsos().contains(iso) || gr.getReportsOk(iso) == gr.getReportsCount()) { String name = ImplementationCheckerLoader.getIsoName(iso); pdfParams.y -= 15; col = gr.getReportsOk(iso) == gr.getReportsCount() ? Color.green : Color.red; pdfParams = writeText(pdfParams, gr.getReportsOk(iso) + " files conforms to " + name, pos_x, font, font_size, col); } } // Pie chart pdfParams.y += 10; if (pdfParams.y > 565) pdfParams.y = 565; pos_x += 200; int graph_size = 40; BufferedImage image = new BufferedImage(graph_size * 10, graph_size * 10, BufferedImage.TYPE_INT_ARGB); Graphics2D g2d = image.createGraphics(); Double doub = (double) gr.getAllReportsOk() / gr.getReportsCount(); double extent = 360d * doub; g2d.setColor(Color.green); g2d.fill(new Arc2D.Double(0, 0, graph_size * 10, graph_size * 10, 90, 360, Arc2D.PIE)); g2d.setColor(Color.red); g2d.fill(new Arc2D.Double(0, 0, graph_size * 10, graph_size * 10, 90, 360 - extent, Arc2D.PIE)); ximage = new PDJpeg(pdfParams.getDocument(), image); pdfParams.getContentStream().drawXObject(ximage, pos_x, pdfParams.y, graph_size, graph_size); pdfParams.y += graph_size - 10; font_size = 7; pdfParams = writeText(pdfParams, gr.getAllReportsOk() + " passed", pos_x + 50, font, font_size, Color.green); pdfParams.y -= 10; pdfParams = writeText(pdfParams, gr.getAllReportsKo() + " failed", pos_x + 50, font, font_size, Color.red); pdfParams.y -= 10; pdfParams = writeText(pdfParams, "Global score " + (int) (doub * 100) + "%", pos_x + 50, font, font_size, Color.black); /** * Individual Tiff images list */ pos_x = 100; pdfParams.y -= 50; for (IndividualReport ir : gr.getIndividualReports()) { int image_height = 65; int image_width = 100; // Draw image String imgPath = pdffile + "img.jpg"; int ids = 0; while (new File(imgPath).exists()) imgPath = pdffile + "img" + ids++ + ".jpg"; boolean check = tiff2Jpg(ir.getFilePath(), imgPath); BufferedImage bimg; if (!check) { bimg = ImageIO.read(getFileStreamFromResources("html/img/noise.jpg")); } else { bimg = ImageIO.read(new File(imgPath)); } image_width = image_height * bimg.getWidth() / bimg.getHeight(); if (image_width > 100) { image_width = 100; image_height = image_width * bimg.getHeight() / bimg.getWidth(); } // Check if we need new page before draw image int maxHeight = getMaxHeight(ir, image_height); if (newPageNeeded(pdfParams.y - maxHeight)) { pdfParams.setContentStream(newPage(pdfParams.getContentStream(), pdfParams.getDocument())); pdfParams.y = init_posy; } int initialy = pdfParams.y; int initialx = 100; pdfParams.y -= maxHeight; int maxy = pdfParams.y; ximage = new PDJpeg(pdfParams.getDocument(), bimg); pdfParams.getContentStream().drawXObject(ximage, pos_x, pdfParams.y, image_width, image_height); if (check) new File(imgPath).delete(); // Values image_width = initialx; pdfParams.y = initialy; if (maxHeight == 65) { pdfParams.y -= 10; } pdfParams = writeText(pdfParams, ir.getFileName(), pos_x + image_width + 10, font, font_size, Color.gray); font_size = 6; pdfParams.y -= 10; pdfParams = writeText(pdfParams, "Conformance Checker", pos_x + image_width + 10, font, font_size, Color.black); pdfParams.getContentStream().drawLine(pos_x + image_width + 10, pdfParams.y - 5, image_width + 150, pdfParams.y - 5); pdfParams.y -= 2; // Isos table for (String iso : ir.getCheckedIsos()) { if (ir.hasValidation(iso) || ir.getNErrors(iso) == 0) { String name = ImplementationCheckerLoader.getIsoName(iso); pdfParams.y -= 10; pdfParams = writeText(pdfParams, name, pos_x + image_width + 10, font, font_size, Color.black); pdfParams = writeText(pdfParams, ir.getNErrors(iso) + " errors", pos_x + image_width + 110, font, font_size, ir.getNErrors(iso) > 0 ? Color.red : Color.black); pdfParams = writeText(pdfParams, ir.getNWarnings(iso) + " warnings", pos_x + image_width + 140, font, font_size, ir.getNWarnings(iso) > 0 ? Color.orange : Color.black); } } if (pdfParams.y < maxy) maxy = pdfParams.y; // Chart pdfParams.y = initialy; pdfParams.y -= 10; pdfParams.y -= 10; graph_size = 25; image = new BufferedImage(graph_size * 10, graph_size * 10, BufferedImage.TYPE_INT_ARGB); g2d = image.createGraphics(); doub = (double) ir.calculatePercent(); extent = 360d * doub / 100.0; g2d.setColor(Color.gray); g2d.fill(new Arc2D.Double(0, 0, graph_size * 10, graph_size * 10, 90, 360, Arc2D.PIE)); g2d.setColor(Color.red); g2d.fill(new Arc2D.Double(0, 0, graph_size * 10, graph_size * 10, 90, 360 - extent, Arc2D.PIE)); ximage = new PDJpeg(pdfParams.getDocument(), image); pdfParams.getContentStream().drawXObject(ximage, pos_x + image_width + 180, pdfParams.y - graph_size, graph_size, graph_size); pdfParams.y += graph_size - 10; if (doub < 100) { pdfParams.y = pdfParams.y - 10 - graph_size / 2; pdfParams = writeText(pdfParams, "Failed", pos_x + image_width + 180 + graph_size + 10, font, font_size, Color.red); } pdfParams.y = pdfParams.y - 10 - graph_size / 2; pdfParams = writeText(pdfParams, "Score " + doub + "%", pos_x + image_width + 180 + graph_size + 10, font, font_size, Color.gray); if (pdfParams.y < maxy) maxy = pdfParams.y; pdfParams.y = maxy - 10; } // Full individual reports ArrayList<PDDocument> toClose = new ArrayList<PDDocument>(); for (IndividualReport ir : gr.getIndividualReports()) { if (!ir.containsData()) continue; PDDocument doc = null; if (ir.getPDF() != null) doc = ir.getPDF(); else if (ir.getPDFDocument() != null) doc = PDDocument.load(ir.getPDFDocument()); if (doc != null) { List<PDPage> l = doc.getDocumentCatalog().getAllPages(); for (PDPage pag : l) { pdfParams.getDocument().addPage(pag); } toClose.add(doc); } } pdfParams.getContentStream().close(); pdfParams.getDocument().save(pdffile); pdfParams.getDocument().close(); for (PDDocument doc : toClose) { doc.close(); } } catch (Exception tfe) { context.send(BasicConfig.MODULE_MESSAGE, new ExceptionMessage("Exception in ReportPDF", tfe)); } }