Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.nuxeo.pdf.test.PDFWatermarkingTest.java

License:Open Source License

protected void checkHasImage(Blob inBlob, int inExpectedWidth, int inExpectedHeight) throws Exception {

    PDDocument doc = PDDocument.load(inBlob.getStream());
    utils.track(doc);/* w w w . j  a  va 2  s  . c  om*/

    List<?> allPages = doc.getDocumentCatalog().getAllPages();
    int max = allPages.size();
    for (int i = 1; i < max; i++) {
        PDPage page = (PDPage) allPages.get(i);

        PDResources pdResources = page.getResources();
        Map<String, PDXObject> allXObjects = pdResources.getXObjects();
        assertNotNull(allXObjects);

        boolean gotIt = false;
        for (Map.Entry<String, PDXObject> entry : allXObjects.entrySet()) {
            PDXObject xobject = entry.getValue();
            if (xobject instanceof PDXObjectImage) {
                PDXObjectImage pdxObjectImage = (PDXObjectImage) xobject;
                if (inExpectedWidth == pdxObjectImage.getWidth()
                        && inExpectedHeight == pdxObjectImage.getHeight()) {
                    gotIt = true;
                    break;
                }
            }
        }
        assertTrue("Page " + i + "does not have the image", gotIt);
    }

    doc.close();
    utils.untrack(doc);
}

From source file:org.nuxeo.webpage.archiver.test.Utils.java

License:Open Source License

public static boolean hasText(Blob inBlob, String inText) throws IOException {

    boolean hasIt = false;
    PDDocument pdfDoc = null;//from w  w w  .  j  a  v  a2 s  . com

    try {
        pdfDoc = PDDocument.load(inBlob.getFile());
        PDFTextStripper stripper = new PDFTextStripper();
        String txt;
        int max = pdfDoc.getNumberOfPages();
        for (int i = 1; i <= max; ++i) {
            stripper.setStartPage(i);
            stripper.setEndPage(i);
            txt = stripper.getText(pdfDoc);
            if (txt.indexOf(inText) > -1) {
                hasIt = true;
                break;
            }
        }
    } finally {
        if (pdfDoc != null) {
            pdfDoc.close();
        }
    }

    return hasIt;
}

From source file:org.nuxeo.webpage.archiver.WebpageToBlob.java

License:Apache License

protected boolean pdfLooksValid(File inPdf) {

    boolean valid = false;

    if (inPdf.exists() && inPdf.length() > 0) {
        PDDocument pdfDoc = null;/*  w  w  w.j a va2s  .com*/
        try {
            pdfDoc = PDDocument.load(inPdf);
            if (pdfDoc.getNumberOfPages() > 0) {
                valid = true;
            }
        } catch (IOException e) {
            // Nothing
        } finally {
            if (pdfDoc != null) {
                try {
                    pdfDoc.close();
                } catch (IOException e) {
                    // Ignore
                }
            }
        }
    }

    return valid;
}

From source file:org.ochan.control.ThumbnailController.java

License:Open Source License

@SuppressWarnings("unchecked")
private BufferedImage takeCaptureOfPDFPage1(byte[] data) {
    try {/*  w w  w . j  ava 2  s  . c om*/
        ByteArrayInputStream bais = new ByteArrayInputStream(data);
        PDDocument document = PDDocument.load(bais);
        // get the first page.
        List<PDPage> pages = (List<PDPage>) document.getDocumentCatalog().getAllPages();
        PDPage page = pages.get(0);
        BufferedImage image = page.convertToImage();
        document.close();
        return image;
    } catch (Exception e) {
        LOG.error("Unable to convert pdf page 1 into godlike image", e);
    }
    return null;
}

From source file:org.olanto.converter.ConverterFactoryPDF.java

License:Open Source License

@Override
public void startConvertion() {
    _logger.debug("Start converting " + source.getName());

    boolean sort = true;
    String fixEncoding = "UTF-8";
    Charset encoding = Charset.forName(fixEncoding);
    int startPage = 1;
    int endPage = Integer.MAX_VALUE;

    Writer output = null;//from  ww w  . jav  a  2  s  .co  m
    Writer outputFile = null;
    PDDocument document = null;
    try {
        document = PDDocument.load(source);

        //            if (document.isEncrypted()) {
        //                _logger.warn("Try to extract text from encrypted document :" + source.getAbsolutePath());
        //                document.decrypt(password);
        //            }

        PDFTextStripper stripper = null;
        if (outputFormat.equalsIgnoreCase(Constants.HTML) || outputFormat.equalsIgnoreCase(Constants.HTM)) {
            stripper = new PDFText2HTML(fixEncoding);
        } else if (outputFormat.equalsIgnoreCase(Constants.TXT)) {
            stripper = new PDFTextStripper(fixEncoding);
        } else {
            _logger.warn("Could not convert PDF file to " + outputFormat);
        }

        if (stripper != null) {
            _logger.debug("open stripper : " + encoding.name());
            stripper.setSortByPosition(sort);
            stripper.setStartPage(startPage);
            stripper.setEndPage(endPage);

            ByteArrayOutputStream buf = new ByteArrayOutputStream();
            output = new OutputStreamWriter(buf, encoding);
            output.write(stripper.getText(document));
            output.flush();

            outputFile = new OutputStreamWriter(new FileOutputStream(target), encoding);
            outputFile.write(buf.toString(encoding.name()));
            buf = null;
            success = true;
            _logger.debug("(writed) file : " + target.getAbsolutePath());
        } else {
            _logger.debug("Destination format not yet implemented");
        }

        //        } catch (CryptographyException ex) {
        //            _logger.error("(Error while decripting) file : " + source.getAbsolutePath(), ex);
        //        } catch (InvalidPasswordException ex) {
        //            _logger.error("(Bad password) file : " + source.getAbsolutePath(), ex);
    } catch (IOException ex) {
        _logger.error("(rejected cannot be opened) file : " + source.getAbsolutePath(), ex);
        success = false;
    } catch (Exception ex) {
        _logger.error("(rejected) file : " + source.getAbsolutePath(), ex);
        success = false;
    } finally {
        try {
            _logger.debug("(closing) file : " + target.getAbsolutePath());
            if (outputFile != null) {
                outputFile.close();
            }
            if (output != null) {
                output.close();
            }
            if (document != null) {
                document.close();
            }
        } catch (IOException e) {
            _logger.error("Could not close document", e);
        }
    }
}

From source file:org.olat.core.commons.services.image.spi.ImageHelperImpl.java

License:Apache License

@Override
public Size thumbnailPDF(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight) {
    InputStream in = null;//w  ww .  jav a  2 s. c om
    PDDocument document = null;
    try {
        WorkThreadInformations.setInfoFiles(null, pdfFile);
        WorkThreadInformations.set("Generate thumbnail VFSLeaf=" + pdfFile);
        in = pdfFile.getInputStream();
        document = PDDocument.load(in);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.info("PDF document is encrypted: " + pdfFile);
                throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile);
            }
        }
        List pages = document.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) pages.get(0);
        BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72);
        Size size = scaleImage(image, thumbnailFile, maxWidth, maxHeight);
        if (size != null) {
            return size;
        }
        return null;
    } catch (CannotGenerateThumbnailException e) {
        return null;
    } catch (Exception e) {
        log.warn("Unable to create image from pdf file.", e);
        return null;
    } finally {
        WorkThreadInformations.unset();
        FileUtils.closeSafely(in);
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
                //only a try, fail silently
            }
        }
    }
}

From source file:org.olat.core.commons.services.thumbnail.impl.PDFToThumbnail.java

License:Apache License

@Override
public FinalSize generateThumbnail(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight)
        throws CannotGenerateThumbnailException {
    InputStream in = null;/*from   ww  w . jav a2s.co  m*/
    PDDocument document = null;
    try {
        in = pdfFile.getInputStream();
        document = PDDocument.load(in);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.info("PDF document is encrypted: " + pdfFile);
                throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile);
            }
        }
        List pages = document.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) pages.get(0);
        BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72);
        Size size = ImageHelper.scaleImage(image, thumbnailFile, maxWidth, maxHeight);
        return new FinalSize(size.getWidth(), size.getWidth());

    } catch (CannotGenerateThumbnailException e) {
        throw e;
    } catch (Exception e) {
        log.warn("Unable to create image from pdf file.", e);
        throw new CannotGenerateThumbnailException(e);
    } finally {
        FileUtils.closeSafely(in);
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
                // only a try, fail silently
            }
        }
    }
}

From source file:org.olat.course.certificate.manager.CertificatePDFFormWorker.java

License:Apache License

public File fill(CertificateTemplate template, File destinationDir, String certificateFilename) {
    PDDocument document = null;//www  .  ja v a 2 s . c  o  m
    InputStream templateStream = null;
    try {
        File templateFile = null;
        if (template != null) {
            templateFile = certificatesManager.getTemplateFile(template);
        }

        if (templateFile != null && templateFile.exists()) {
            templateStream = new FileInputStream(templateFile);
        } else {
            templateStream = CertificatesManager.class.getResourceAsStream("template.pdf");
        }

        document = PDDocument.load(templateStream);

        PDDocumentCatalog docCatalog = document.getDocumentCatalog();
        PDAcroForm acroForm = docCatalog.getAcroForm();
        if (acroForm != null) {
            fillUserProperties(acroForm);
            fillRepositoryEntry(acroForm);
            fillCertificationInfos(acroForm);
            fillAssessmentInfos(acroForm);
        }
        if (!destinationDir.exists()) {
            destinationDir.mkdirs();
        }

        File certificateFile = new File(destinationDir, certificateFilename);
        OutputStream out = new FileOutputStream(certificateFile);
        document.save(out);
        out.flush();
        out.close();
        return certificateFile;
    } catch (Exception e) {
        log.error("", e);
        return null;
    } finally {
        IOUtils.closeQuietly(document);
        IOUtils.closeQuietly(templateStream);
    }
}

From source file:org.olat.course.certificate.ui.UploadCertificateController.java

License:Apache License

private boolean validatePdf(File template) {
    boolean allOk = true;

    PDDocument document = null;//  www  .  jav a  2s  . c om
    try (InputStream in = Files.newInputStream(template.toPath())) {
        document = PDDocument.load(in);
        if (document.isEncrypted()) {
            fileEl.setErrorKey("upload.error.encrypted", null);
            allOk &= false;
        } else {
            //check if we can write the form
            PDDocumentCatalog docCatalog = document.getDocumentCatalog();
            PDAcroForm acroForm = docCatalog.getAcroForm();
            if (acroForm != null) {
                @SuppressWarnings("unchecked")
                List<PDField> fields = acroForm.getFields();
                for (PDField field : fields) {
                    field.setValue("test");
                }
            }
            document.save(new DevNullOutputStream());
        }
    } catch (IOException ex) {
        logError("", ex);
        if (ex.getMessage() != null
                && ex.getMessage().contains("Don't know how to calculate the position for non-simple fonts")) {
            fileEl.setErrorKey("upload.error.simplefonts", null);
        } else {
            fileEl.setErrorKey("upload.unkown.error", null);
        }
        allOk &= false;
    } catch (Exception ex) {
        logError("", ex);
        fileEl.setErrorKey("upload.unkown.error", null);
        allOk &= false;
    } finally {
        IOUtils.closeQuietly(document);
    }

    return allOk;
}

From source file:org.olat.search.service.document.file.pdf.PdfBoxExtractor.java

License:Apache License

private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug())
        log.debug("readContent from pdf starts...");
    PDDocument document = null;/* w w  w  .j a  v a2 s  .  com*/
    BufferedInputStream bis = null;
    try {
        bis = new BufferedInputStream(leaf.getInputStream());
        document = PDDocument.load(bis);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
                LimitedContentWriter writer = new LimitedContentWriter(128,
                        FileDocumentFactory.getMaxFileSize());
                writer.append(leaf.getName());
                writer.close();
                return new FileContent(leaf.getName(), writer.toString());
            }
        }
        String title = getTitle(document);
        if (log.isDebug())
            log.debug("readContent PDDocument loaded");
        PDFTextStripper stripper = new PDFTextStripper();
        LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
        stripper.writeText(document, writer);
        writer.close();
        return new FileContent(title, writer.toString());
    } finally {
        if (document != null) {
            document.close();
        }
        if (bis != null) {
            bis.close();
        }
    }
}