List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.nuxeo.pdf.test.PDFWatermarkingTest.java
License:Open Source License
protected void checkHasImage(Blob inBlob, int inExpectedWidth, int inExpectedHeight) throws Exception { PDDocument doc = PDDocument.load(inBlob.getStream()); utils.track(doc);/* w w w . j a va 2 s . c om*/ List<?> allPages = doc.getDocumentCatalog().getAllPages(); int max = allPages.size(); for (int i = 1; i < max; i++) { PDPage page = (PDPage) allPages.get(i); PDResources pdResources = page.getResources(); Map<String, PDXObject> allXObjects = pdResources.getXObjects(); assertNotNull(allXObjects); boolean gotIt = false; for (Map.Entry<String, PDXObject> entry : allXObjects.entrySet()) { PDXObject xobject = entry.getValue(); if (xobject instanceof PDXObjectImage) { PDXObjectImage pdxObjectImage = (PDXObjectImage) xobject; if (inExpectedWidth == pdxObjectImage.getWidth() && inExpectedHeight == pdxObjectImage.getHeight()) { gotIt = true; break; } } } assertTrue("Page " + i + "does not have the image", gotIt); } doc.close(); utils.untrack(doc); }
From source file:org.nuxeo.webpage.archiver.test.Utils.java
License:Open Source License
public static boolean hasText(Blob inBlob, String inText) throws IOException { boolean hasIt = false; PDDocument pdfDoc = null;//from w w w . j a v a2 s . com try { pdfDoc = PDDocument.load(inBlob.getFile()); PDFTextStripper stripper = new PDFTextStripper(); String txt; int max = pdfDoc.getNumberOfPages(); for (int i = 1; i <= max; ++i) { stripper.setStartPage(i); stripper.setEndPage(i); txt = stripper.getText(pdfDoc); if (txt.indexOf(inText) > -1) { hasIt = true; break; } } } finally { if (pdfDoc != null) { pdfDoc.close(); } } return hasIt; }
From source file:org.nuxeo.webpage.archiver.WebpageToBlob.java
License:Apache License
protected boolean pdfLooksValid(File inPdf) { boolean valid = false; if (inPdf.exists() && inPdf.length() > 0) { PDDocument pdfDoc = null;/* w w w.j a va2s .com*/ try { pdfDoc = PDDocument.load(inPdf); if (pdfDoc.getNumberOfPages() > 0) { valid = true; } } catch (IOException e) { // Nothing } finally { if (pdfDoc != null) { try { pdfDoc.close(); } catch (IOException e) { // Ignore } } } } return valid; }
From source file:org.ochan.control.ThumbnailController.java
License:Open Source License
@SuppressWarnings("unchecked") private BufferedImage takeCaptureOfPDFPage1(byte[] data) { try {/* w w w . j ava 2 s . c om*/ ByteArrayInputStream bais = new ByteArrayInputStream(data); PDDocument document = PDDocument.load(bais); // get the first page. List<PDPage> pages = (List<PDPage>) document.getDocumentCatalog().getAllPages(); PDPage page = pages.get(0); BufferedImage image = page.convertToImage(); document.close(); return image; } catch (Exception e) { LOG.error("Unable to convert pdf page 1 into godlike image", e); } return null; }
From source file:org.olanto.converter.ConverterFactoryPDF.java
License:Open Source License
@Override public void startConvertion() { _logger.debug("Start converting " + source.getName()); boolean sort = true; String fixEncoding = "UTF-8"; Charset encoding = Charset.forName(fixEncoding); int startPage = 1; int endPage = Integer.MAX_VALUE; Writer output = null;//from ww w . jav a 2 s .co m Writer outputFile = null; PDDocument document = null; try { document = PDDocument.load(source); // if (document.isEncrypted()) { // _logger.warn("Try to extract text from encrypted document :" + source.getAbsolutePath()); // document.decrypt(password); // } PDFTextStripper stripper = null; if (outputFormat.equalsIgnoreCase(Constants.HTML) || outputFormat.equalsIgnoreCase(Constants.HTM)) { stripper = new PDFText2HTML(fixEncoding); } else if (outputFormat.equalsIgnoreCase(Constants.TXT)) { stripper = new PDFTextStripper(fixEncoding); } else { _logger.warn("Could not convert PDF file to " + outputFormat); } if (stripper != null) { _logger.debug("open stripper : " + encoding.name()); stripper.setSortByPosition(sort); stripper.setStartPage(startPage); stripper.setEndPage(endPage); ByteArrayOutputStream buf = new ByteArrayOutputStream(); output = new OutputStreamWriter(buf, encoding); output.write(stripper.getText(document)); output.flush(); outputFile = new OutputStreamWriter(new FileOutputStream(target), encoding); outputFile.write(buf.toString(encoding.name())); buf = null; success = true; _logger.debug("(writed) file : " + target.getAbsolutePath()); } else { _logger.debug("Destination format not yet implemented"); } // } catch (CryptographyException ex) { // _logger.error("(Error while decripting) file : " + source.getAbsolutePath(), ex); // } catch (InvalidPasswordException ex) { // _logger.error("(Bad password) file : " + source.getAbsolutePath(), ex); } catch (IOException ex) { _logger.error("(rejected cannot be opened) file : " + source.getAbsolutePath(), ex); success = false; } catch (Exception ex) { _logger.error("(rejected) file : " + source.getAbsolutePath(), ex); success = false; } finally { try { _logger.debug("(closing) file : " + target.getAbsolutePath()); if (outputFile != null) { outputFile.close(); } if (output != null) { output.close(); } if (document != null) { document.close(); } } catch (IOException e) { _logger.error("Could not close document", e); } } }
From source file:org.olat.core.commons.services.image.spi.ImageHelperImpl.java
License:Apache License
@Override public Size thumbnailPDF(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight) { InputStream in = null;//w ww . jav a 2 s. c om PDDocument document = null; try { WorkThreadInformations.setInfoFiles(null, pdfFile); WorkThreadInformations.set("Generate thumbnail VFSLeaf=" + pdfFile); in = pdfFile.getInputStream(); document = PDDocument.load(in); if (document.isEncrypted()) { try { document.decrypt(""); } catch (Exception e) { log.info("PDF document is encrypted: " + pdfFile); throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile); } } List pages = document.getDocumentCatalog().getAllPages(); PDPage page = (PDPage) pages.get(0); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72); Size size = scaleImage(image, thumbnailFile, maxWidth, maxHeight); if (size != null) { return size; } return null; } catch (CannotGenerateThumbnailException e) { return null; } catch (Exception e) { log.warn("Unable to create image from pdf file.", e); return null; } finally { WorkThreadInformations.unset(); FileUtils.closeSafely(in); if (document != null) { try { document.close(); } catch (IOException e) { //only a try, fail silently } } } }
From source file:org.olat.core.commons.services.thumbnail.impl.PDFToThumbnail.java
License:Apache License
@Override public FinalSize generateThumbnail(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight) throws CannotGenerateThumbnailException { InputStream in = null;/*from ww w . jav a2s.co m*/ PDDocument document = null; try { in = pdfFile.getInputStream(); document = PDDocument.load(in); if (document.isEncrypted()) { try { document.decrypt(""); } catch (Exception e) { log.info("PDF document is encrypted: " + pdfFile); throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile); } } List pages = document.getDocumentCatalog().getAllPages(); PDPage page = (PDPage) pages.get(0); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72); Size size = ImageHelper.scaleImage(image, thumbnailFile, maxWidth, maxHeight); return new FinalSize(size.getWidth(), size.getWidth()); } catch (CannotGenerateThumbnailException e) { throw e; } catch (Exception e) { log.warn("Unable to create image from pdf file.", e); throw new CannotGenerateThumbnailException(e); } finally { FileUtils.closeSafely(in); if (document != null) { try { document.close(); } catch (IOException e) { // only a try, fail silently } } } }
From source file:org.olat.course.certificate.manager.CertificatePDFFormWorker.java
License:Apache License
public File fill(CertificateTemplate template, File destinationDir, String certificateFilename) { PDDocument document = null;//www . ja v a 2 s . c o m InputStream templateStream = null; try { File templateFile = null; if (template != null) { templateFile = certificatesManager.getTemplateFile(template); } if (templateFile != null && templateFile.exists()) { templateStream = new FileInputStream(templateFile); } else { templateStream = CertificatesManager.class.getResourceAsStream("template.pdf"); } document = PDDocument.load(templateStream); PDDocumentCatalog docCatalog = document.getDocumentCatalog(); PDAcroForm acroForm = docCatalog.getAcroForm(); if (acroForm != null) { fillUserProperties(acroForm); fillRepositoryEntry(acroForm); fillCertificationInfos(acroForm); fillAssessmentInfos(acroForm); } if (!destinationDir.exists()) { destinationDir.mkdirs(); } File certificateFile = new File(destinationDir, certificateFilename); OutputStream out = new FileOutputStream(certificateFile); document.save(out); out.flush(); out.close(); return certificateFile; } catch (Exception e) { log.error("", e); return null; } finally { IOUtils.closeQuietly(document); IOUtils.closeQuietly(templateStream); } }
From source file:org.olat.course.certificate.ui.UploadCertificateController.java
License:Apache License
private boolean validatePdf(File template) { boolean allOk = true; PDDocument document = null;// www . jav a 2s . c om try (InputStream in = Files.newInputStream(template.toPath())) { document = PDDocument.load(in); if (document.isEncrypted()) { fileEl.setErrorKey("upload.error.encrypted", null); allOk &= false; } else { //check if we can write the form PDDocumentCatalog docCatalog = document.getDocumentCatalog(); PDAcroForm acroForm = docCatalog.getAcroForm(); if (acroForm != null) { @SuppressWarnings("unchecked") List<PDField> fields = acroForm.getFields(); for (PDField field : fields) { field.setValue("test"); } } document.save(new DevNullOutputStream()); } } catch (IOException ex) { logError("", ex); if (ex.getMessage() != null && ex.getMessage().contains("Don't know how to calculate the position for non-simple fonts")) { fileEl.setErrorKey("upload.error.simplefonts", null); } else { fileEl.setErrorKey("upload.unkown.error", null); } allOk &= false; } catch (Exception ex) { logError("", ex); fileEl.setErrorKey("upload.unkown.error", null); allOk &= false; } finally { IOUtils.closeQuietly(document); } return allOk; }
From source file:org.olat.search.service.document.file.pdf.PdfBoxExtractor.java
License:Apache License
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException { if (log.isDebug()) log.debug("readContent from pdf starts..."); PDDocument document = null;/* w w w .j a v a2 s . com*/ BufferedInputStream bis = null; try { bis = new BufferedInputStream(leaf.getInputStream()); document = PDDocument.load(bis); if (document.isEncrypted()) { try { document.decrypt(""); } catch (Exception e) { log.warn("PDF is encrypted. Can not read content file=" + leaf.getName()); LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize()); writer.append(leaf.getName()); writer.close(); return new FileContent(leaf.getName(), writer.toString()); } } String title = getTitle(document); if (log.isDebug()) log.debug("readContent PDDocument loaded"); PDFTextStripper stripper = new PDFTextStripper(); LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize()); stripper.writeText(document, writer); writer.close(); return new FileContent(title, writer.toString()); } finally { if (document != null) { document.close(); } if (bis != null) { bis.close(); } } }