List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:de.uni_koeln.ub.drc.reader.PdfContentExtractor.java
License:Open Source License
/** * @param pdfName//from w w w . j ava 2s . co m * The full path to the PDF file to extract content from * @return The PageInfo object for the PDF */ public static PageInfo extractContentFromPdf(String pdfName) { try { location = pdfName; PDDocument document = PDDocument.load(new File(pdfName)); PdfContentExtractor x = initExtractor(document); PageInfo result = x.toPageInfo(); document.close(); return result; } catch (IOException e) { e.printStackTrace(); } return null; }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@Override public void generateThumbnail(final File input, final File output) throws IOException, ThumbnailerException { FileUtils.deleteQuietly(output);/*from w w w . ja v a 2 s. c om*/ PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<?> pages = document.getDocumentCatalog().getAllPages(); final PDPage page = (PDPage) pages.get(0); final BufferedImage tmpImage = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); if (tmpImage.getWidth() == this.thumbWidth) { ImageIO.write(tmpImage, PDFBoxThumbnailer.OUTPUT_FORMAT, output); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.NO_RESIZE_ONLY_CROP; resizer.setInputImage(tmpImage); resizer.writeOutput(output); } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { } } } }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@SuppressWarnings("unchecked") @Override//from ww w . j a v a 2 s . co m public void generateThumbnails(final File input, final File outputFolder) throws IOException, ThumbnailerException { PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); int pageNumber = 0; for (final PDPage page : allPages) { final BufferedImage image = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); final File outputFile = ThumbnailNamer.getFile(outputFolder, pageNumber); if (image.getWidth() == this.thumbWidth) { ImageIO.write(image, PDFBoxThumbnailer.OUTPUT_FORMAT, outputFile); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.RESIZE_FIT_BOTH_DIMENSIONS; resizer.setInputImage(image); resizer.writeOutput(outputFile); } pageNumber++; } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { // swallow exception on closing. } } } }
From source file:de.uzk.hki.da.convert.PdfService.java
License:Open Source License
/** * Generates a new PDF which only contains certainPages of the original document. * Users can choose if they wish to reduce to a number of pages (beginning from the first page) * or to a certain set of pages. Both options can be used at the same time. * By setting one of the parameters to either "" or null any of the options the according * option will not be used./*from w ww . j av a2 s . c om*/ * * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document). * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document. * * @throws IOException * @author Jens Peters * @author Sebastian Cuy * @author Daniel M. de Oliveira */ public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException { PDDocument srcPdf = null; PDDocument targetPdf = null; if (srcPdfFile == null) throw new IllegalStateException("srcFile not set"); srcPdf = PDDocument.load(srcPdfFile); targetPdf = new PDDocument(); @SuppressWarnings("rawtypes") List srcPages = srcPdf.getDocumentCatalog().getAllPages(); int numberOfPages = 0; if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) { numberOfPages = Integer.parseInt(numberOfPagesText); for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (StringUtilities.isNotSet(numberOfPagesText) && StringUtilities.isNotSet(certainPagesText)) { for (int i = 0; i < srcPages.size(); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (certainPagesText != null && !certainPagesText.isEmpty()) { String[] certainPagesTexts = certainPagesText.split(" "); int[] certainPages = new int[certainPagesTexts.length]; for (int i = 0; i < certainPagesTexts.length; i++) { certainPages[i] = Integer.parseInt(certainPagesTexts[i]); } Arrays.sort(certainPages); for (int i = 0; i < certainPages.length; i++) { if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1) targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1)); } } try { targetPdf.save(targetPdfFile); } catch (Exception e) { throw new RuntimeException("Unable to create PDF!", e); } finally { targetPdf.close(); srcPdf.close(); } }
From source file:de.uzk.hki.da.format.PdfService.java
License:Open Source License
/** * Generates a new PDF which only contains certainPages of the original document. * Users can choose if they wish to reduce to a number of pages (beginning from the first page) * or to a certain set of pages. Both options can be used at the same time. * By setting one of the parameters to either "" or null any of the options the according * option will not be used./* www . ja v a 2s .c om*/ * * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document). * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document. * * @throws IOException * @author Jens Peters * @author Sebastian Cuy * @author Daniel M. de Oliveira */ public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException { PDDocument srcPdf = null; PDDocument targetPdf = null; if (srcPdfFile == null) throw new IllegalStateException("srcFile not set"); srcPdf = PDDocument.load(srcPdfFile); targetPdf = new PDDocument(); @SuppressWarnings("rawtypes") List srcPages = srcPdf.getDocumentCatalog().getAllPages(); int numberOfPages = 0; if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) { numberOfPages = Integer.parseInt(numberOfPagesText); for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++) targetPdf.addPage((PDPage) srcPages.get(i)); } if (certainPagesText != null && !certainPagesText.isEmpty()) { String[] certainPagesTexts = certainPagesText.split(" "); int[] certainPages = new int[certainPagesTexts.length]; for (int i = 0; i < certainPagesTexts.length; i++) { certainPages[i] = Integer.parseInt(certainPagesTexts[i]); } Arrays.sort(certainPages); for (int i = 0; i < certainPages.length; i++) { if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1) targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1)); } } try { targetPdf.save(targetPdfFile); } catch (Exception e) { throw new RuntimeException("Unable to create PDF!", e); } finally { targetPdf.close(); } }
From source file:dev.ztgnrw.ExtractEmbeddedFiles.java
License:Apache License
/** * This is the main method.//w w w .j a va 2 s . c o m * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void extractEmbeddedFiles(String file) throws IOException { PDDocument document = null; try { File pdfFile = new File(file); String filePath = pdfFile.getParent() + System.getProperty("file.separator"); document = PDDocument.load(pdfFile); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree != null) { Map<String, PDComplexFileSpecification> names = efTree.getNames(); if (names != null) { extractFiles(names, filePath); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { names = node.getNames(); extractFiles(names, filePath); } } } // extract files from annotations for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment .getFile(); PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); extractFile(filePath, fileSpec.getFilename(), embeddedFile); } } } } finally { if (document != null) { document.close(); } } }
From source file:diagramextractor.DiagramExtractor.java
/** * @param args the command line arguments */// w w w . ja v a2s . c o m public static void main(String[] args) throws IOException, COSVisitorException { if (args.length < 2) { showHelp(); System.exit(-1); } List<Integer> diagramOptionsList = new LinkedList<>(); diagramOptionsList = parseOptions(args); List<String> diagramNameList = new LinkedList<>(); diagramNameList = getDiagramNames(diagramOptionsList); File inputDir = new File(args[0]); File[] reports = inputDir.listFiles(); String diagramName = args[1]; PDDocument outputDocument = new PDDocument(); PDFMergerUtility merger = new PDFMergerUtility(); merger.setDestinationFileName("output.pdf"); for (File report : reports) { PDDocument doc = PDDocument.load(report); System.out.println("LOADED FILE: " + report.getName()); int pageNumber = 0; System.out.println("NUMBER OF PAGES: " + doc.getNumberOfPages()); for (int i = 0; i <= doc.getNumberOfPages(); i++) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(i); stripper.setEndPage(i); String contents = stripper.getText(doc); boolean containsDiagram = false; for (String diagram : diagramNameList) { if (contents.contains(diagram)) { containsDiagram = true; } } if (containsDiagram && !contents.contains("Table of Contents") && !contents.contains("Table of Figures") && !contents.contains("Obsah") && !contents.contains("Tabulka ?sel")) { pageNumber = i; System.out.println("Diagram found on page: " + pageNumber); PageExtractor extractor = new PageExtractor(doc, pageNumber, pageNumber); PDDocument extractedPage = extractor.extract(); PDPage page = (PDPage) extractedPage.getDocumentCatalog().getAllPages().get(0); PDPageContentStream contentStream = new PDPageContentStream(extractedPage, page, true, true, true); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.moveTextPositionByAmount(100, 50); contentStream.drawString(report.getName()); contentStream.endText(); contentStream.close(); merger.appendDocument(outputDocument, extractedPage); } } if (pageNumber == 0) { System.out.println("The diagram " + diagramName + " was not found in file " + report.getName()); } doc.close(); } merger.mergeDocuments(); System.out.println(); System.out.println("Diagrams have been merged."); String outputFileName = generateFilename(inputDir.getCanonicalPath(), "output.pdf"); outputDocument.save(outputFileName); outputDocument.close(); System.out.println("Output file saved as: " + outputFileName); }
From source file:dk.defxws.fedoragsearch.server.TransformerToText.java
License:Open Source License
private void closePDDocument(PDDocument pdDoc) { if (pdDoc != null) { try {/*from w w w .j av a 2 s. c o m*/ pdDoc.close(); } catch (IOException e) { } } }
From source file:dk.dma.msinm.legacy.nm.ActiveTempPrelimNmPdfExtractor.java
License:Open Source License
/** * Main method for extracting active list of NtM's * @param noticeIds the list of notices to update *///w ww . j ava 2s . c om public void extractActiveNoticeIds(List<SeriesIdentifier> noticeIds) throws Exception { PDDocument document = null; try { PDFTextStripper stripper = new PDFTextStripper(); document = PDDocument.load(inputStream); //stripper.setStartPage(1); String text = stripper.getText(document); // Read the text line by line Pattern p = Pattern.compile(ACTIVE_NM_LINE); BufferedReader br = new BufferedReader(new StringReader(text)); String line; while ((line = br.readLine()) != null) { Matcher m = p.matcher(line.trim()); if (m.matches()) { SeriesIdentifier id = new SeriesIdentifier(); id.setMainType(SeriesIdType.NM); id.setYear(year); id.setNumber(Integer.valueOf(m.group(1))); id.setAuthority(organization); noticeIds.add(id); } } } catch (IOException e) { log.error("Error extracting notices from file " + fileName, e); throw e; } finally { if (document != null) { document.close(); } try { inputStream.close(); } catch (Exception ex) { } } }
From source file:dk.dma.msinm.legacy.nm.NmPdfExtractor.java
License:Open Source License
/** * Main method for extracting the NtM's//from ww w. j a v a 2 s . c o m * @param notices the list of notices to update */ public void extractNotices(List<Message> notices) throws Exception { PDDocument document = null; try { PDFTextStripper stripper = new PDFTextStripper(); document = PDDocument.load(inputStream); stripper.setStartPage(3); String text = stripper.getText(document); List<String> textBlocks = extractNoticeTextBlocks(text); extractNotices(notices, textBlocks); } catch (IOException e) { log.error("Error extracting notivces from file " + fileName, e); throw e; } finally { if (document != null) { document.close(); } try { inputStream.close(); } catch (Exception ex) { } } }