Example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted

List of usage examples for org.apache.pdfbox.pdmodel PDDocument isEncrypted

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted.

Prototype

public boolean isEncrypted() 

Source Link

Document

This will tell if this document is encrypted or not.

Usage

From source file:org.knoesis.matvocab.indexer.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 */// w  w  w. j av a  2 s  .com
private void addContent(Document document, InputStream is, String documentLocation, PDFTextStripper stripper)
        throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        } else {
            stripper.resetEngine();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();
        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addField(document, "contents", contents);

        addField(document, "stemmedcontents", contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addField(document, "Author", info.getAuthor());
            try {
                addField(document, "CreationDate", info.getCreationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Creator", info.getCreator());
            addField(document, "Keywords", info.getKeywords());
            try {
                addField(document, "ModificationDate", info.getModificationDate());
            } catch (IOException io) {
                //ignore, bad date but continue with indexing
            }
            addField(document, "Producer", info.getProducer());
            addField(document, "Subject", info.getSubject());
            addField(document, "Title", info.getTitle());
            addField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addField(document, "summary", summary);
        addField(document, "numpages", String.valueOf(pdfDocument.getNumberOfPages()));
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.lockss.pdf.pdfbox.PdfBoxDocumentFactory.java

License:Open Source License

/**
 * <p>/*from w  ww. j a  v  a  2s. c o  m*/
 * Override this method to alter the processing of the {@link PDDocument}
 * instance after it has been parsed by {@link PDFParser#parse()}.
 * </p>
 * 
 * @param pdDocument
 *          A freshly parsed {@link PDDocument} instance
 * @throws CryptographyException
 *           if a cryptography exception is thrown
 * @throws IOException
 *           if an I/O exception is thrown
 * @since 1.67
 */
protected void processAfterParse(PDDocument pdDocument) throws CryptographyException, IOException {
    pdDocument.setAllSecurityToBeRemoved(true);
    if (pdDocument.isEncrypted()) {
        pdDocument.decrypt("");
    }
}

From source file:org.mitre.xtext.converters.PDFConverter.java

License:Apache License

/** Implementation is informed by PDFBox authors.
 *//*from w  w  w .  j a v a 2s  . com*/
@Override
public synchronized ConvertedDocument convert(java.io.File doc) throws IOException {

    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *      http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /**
     * Adapted from LucenePDFDocument.java from PDFBox lucene project
     *
     * This class is used to create a document for the lucene search engine.
     * This should easily plug into the IndexHTML or IndexFiles that comes with
     * the lucene project. This class will populate the following fields.
     * <table> <tr> <th>Lucene Field Name</th> <th>Description</th> </tr> <tr>
     * <td>path</td> <td>File system path if loaded from a file</td> </tr> <tr>
     * <td>url</td> <td>URL to PDF document</td> </tr> <tr> <td>contents</td>
     * <td>Entire contents of PDF document, indexed but not stored</td> </tr>
     * <tr> <td>summary</td> <td>First 500 characters of content</td> </tr> <tr>
     * <td>modified</td> <td>The modified date/time according to the url or
     * path</td> </tr> <tr> <td>uid</td> <td>A unique identifier for the Lucene
     * document.</td> </tr> <tr> <td>CreationDate</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>Creator</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>Keywords</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>ModificationDate</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Producer</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Subject</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Trapped</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Encrypted</td> <td>From PDF
     * meta-data if available</td> </tr> </table>
     *
     * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
     * @version $Revision: 1.23 $
     *
     * @throws IOException If there is an error parsing the document.
     */
    PDDocument pdfDocument = null;
    ConvertedDocument textdoc = new ConvertedDocument(doc);

    try {
        pdfDocument = PDDocument.load(doc);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            /**
             *
             * Exception in thread "main" java.lang.NoClassDefFoundError:
             * org/bouncycastle/jce/provider/BouncyCastleProvider at
             * org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1090)
             * at
             * org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:594)
             *
             * CRYPTO stuff -- load BouncyCastle crypto JAR files. try {
             * pdfDocument.decrypt(""); } catch (CryptographyException e) {
             * throw new IOException("Error decrypting document(" + pdf_file
             * + "): " + e); } catch (InvalidPasswordException e) { //they
             * didn't suppply a password and the default of "" was wrong.
             * throw new IOException( "Error: The document(" + pdf_file + ")
             * is encrypted "); } finally { if (pdfDocument != null) {
             * pdfDocument.close();} }
             */
            textdoc.addProperty("encrypted", "YES");
        } else {

            //create a writer where to append the text content.
            StringWriter writer = new StringWriter();
            stripper.resetEngine();
            stripper.writeText(pdfDocument, writer);

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if (info != null) {
                textdoc.addAuthor(info.getAuthor());
                try {
                    textdoc.addCreateDate(info.getCreationDate());
                } catch (IOException io) {
                    //ignore, bad date but continue with indexing
                }
                textdoc.addProperty("creator_tool", info.getCreator());
                textdoc.addProperty("keywords", info.getKeywords());
                /* try {
                 metadata.add("ModificationDate", info.getModificationDate());
                 } catch (IOException io) {
                 //ignore, bad date but continue with indexing
                 } */
                //metadata.add("Producer", info.getProducer());
                textdoc.addProperty("subject", info.getSubject());
                String ttl = info.getTitle();
                if (ttl == null || "untitled".equalsIgnoreCase(ttl)) {
                    ttl = textdoc.filename;
                }
                textdoc.addTitle(ttl);
                // metadata.add("Trapped", info.getTrapped());

                // TODO: Character set is what?
                textdoc.setEncoding("UTF-8");
            }

            // Note: the buffer to string operation is costless;
            // the char array value of the writer buffer and the content string
            // is shared as long as the buffer content is not modified, which will
            // not occur here.
            textdoc.setPayload(writer.getBuffer().toString());
        }
        return textdoc;

    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.nuxeo.pdf.test.PDFEncryptionTest.java

License:Open Source License

protected void checkIsReadOnly(Blob inBlob, String ownerPwd, String userPwd) throws Exception {

    assertNotNull(inBlob);/*from  w w  w  .  java2s  .  c  o  m*/

    PDDocument pdfDoc = utils.loadAndTrack(inBlob);
    assertTrue(pdfDoc.isEncrypted());

    // Decrypt as user
    pdfDoc.openProtection(new StandardDecryptionMaterial(userPwd));
    assertFalse(pdfDoc.isEncrypted());
    AccessPermission ap = pdfDoc.getCurrentAccessPermission();
    assertTrue(ap.canExtractContent());
    assertTrue(ap.canExtractForAccessibility());
    assertTrue(ap.canPrint());
    assertTrue(ap.canPrintDegraded());

    assertFalse(ap.canAssembleDocument());
    assertFalse(ap.canFillInForm());
    assertFalse(ap.canModifyAnnotations());

    // Decrypt as owner
    utils.closeAndUntrack(pdfDoc);
    pdfDoc = utils.loadAndTrack(inBlob);
    pdfDoc.openProtection(new StandardDecryptionMaterial(ownerPwd));
    assertFalse(pdfDoc.isEncrypted());
    ap = pdfDoc.getCurrentAccessPermission();
    assertTrue(ap.isOwnerPermission());

    utils.closeAndUntrack(pdfDoc);

}

From source file:org.nuxeo.pdf.test.PDFEncryptionTest.java

License:Open Source License

@Test
public void testRemoveEncryption() throws Exception {

    // Test with encrypted PDF
    File f = FileUtils.getResourceFileFromContext(ENCRYPTED_PDF);
    FileBlob fb = new FileBlob(f);

    // Just check it is encrypted first
    PDDocument pdfDoc = utils.loadAndTrack(fb);
    assertTrue(pdfDoc.isEncrypted());
    utils.closeAndUntrack(pdfDoc);/*  w  w  w . java 2s.c  o  m*/

    PDFEncryption pdfe = new PDFEncryption(fb);
    pdfe.setOriginalOwnerPwd(ENCRYPTED_PDF_PWD);
    Blob result = pdfe.removeEncryption();

    assertNotNull(result);

    pdfDoc = utils.loadAndTrack(result);
    assertFalse(pdfDoc.isEncrypted());
    utils.closeAndUntrack(pdfDoc);

    // Test with a non-encrypted PDF (removing encryption should not trigger an error)
    pdfe = new PDFEncryption(pdfFileBlob);
    pdfe.setOriginalOwnerPwd(ENCRYPTED_PDF_PWD);
    result = pdfe.removeEncryption();

    assertNotNull(result);

    pdfDoc = utils.loadAndTrack(result);
    assertFalse(pdfDoc.isEncrypted());
    utils.closeAndUntrack(pdfDoc);

}

From source file:org.olat.core.commons.services.image.spi.ImageHelperImpl.java

License:Apache License

@Override
public Size thumbnailPDF(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight) {
    InputStream in = null;/*from  w w  w .j  a va  2 s  . com*/
    PDDocument document = null;
    try {
        WorkThreadInformations.setInfoFiles(null, pdfFile);
        WorkThreadInformations.set("Generate thumbnail VFSLeaf=" + pdfFile);
        in = pdfFile.getInputStream();
        document = PDDocument.load(in);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.info("PDF document is encrypted: " + pdfFile);
                throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile);
            }
        }
        List pages = document.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) pages.get(0);
        BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72);
        Size size = scaleImage(image, thumbnailFile, maxWidth, maxHeight);
        if (size != null) {
            return size;
        }
        return null;
    } catch (CannotGenerateThumbnailException e) {
        return null;
    } catch (Exception e) {
        log.warn("Unable to create image from pdf file.", e);
        return null;
    } finally {
        WorkThreadInformations.unset();
        FileUtils.closeSafely(in);
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
                //only a try, fail silently
            }
        }
    }
}

From source file:org.olat.core.commons.services.thumbnail.impl.PDFToThumbnail.java

License:Apache License

@Override
public FinalSize generateThumbnail(VFSLeaf pdfFile, VFSLeaf thumbnailFile, int maxWidth, int maxHeight)
        throws CannotGenerateThumbnailException {
    InputStream in = null;//w  w w .j a va2s.  c  o  m
    PDDocument document = null;
    try {
        in = pdfFile.getInputStream();
        document = PDDocument.load(in);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.info("PDF document is encrypted: " + pdfFile);
                throw new CannotGenerateThumbnailException("PDF document is encrypted: " + pdfFile);
            }
        }
        List pages = document.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) pages.get(0);
        BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_BGR, 72);
        Size size = ImageHelper.scaleImage(image, thumbnailFile, maxWidth, maxHeight);
        return new FinalSize(size.getWidth(), size.getWidth());

    } catch (CannotGenerateThumbnailException e) {
        throw e;
    } catch (Exception e) {
        log.warn("Unable to create image from pdf file.", e);
        throw new CannotGenerateThumbnailException(e);
    } finally {
        FileUtils.closeSafely(in);
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
                // only a try, fail silently
            }
        }
    }
}

From source file:org.olat.course.certificate.ui.UploadCertificateController.java

License:Apache License

private boolean validatePdf(File template) {
    boolean allOk = true;

    PDDocument document = null;
    try (InputStream in = Files.newInputStream(template.toPath())) {
        document = PDDocument.load(in);//from w w  w  . jav a 2 s.co m
        if (document.isEncrypted()) {
            fileEl.setErrorKey("upload.error.encrypted", null);
            allOk &= false;
        } else {
            //check if we can write the form
            PDDocumentCatalog docCatalog = document.getDocumentCatalog();
            PDAcroForm acroForm = docCatalog.getAcroForm();
            if (acroForm != null) {
                @SuppressWarnings("unchecked")
                List<PDField> fields = acroForm.getFields();
                for (PDField field : fields) {
                    field.setValue("test");
                }
            }
            document.save(new DevNullOutputStream());
        }
    } catch (IOException ex) {
        logError("", ex);
        if (ex.getMessage() != null
                && ex.getMessage().contains("Don't know how to calculate the position for non-simple fonts")) {
            fileEl.setErrorKey("upload.error.simplefonts", null);
        } else {
            fileEl.setErrorKey("upload.unkown.error", null);
        }
        allOk &= false;
    } catch (Exception ex) {
        logError("", ex);
        fileEl.setErrorKey("upload.unkown.error", null);
        allOk &= false;
    } finally {
        IOUtils.closeQuietly(document);
    }

    return allOk;
}

From source file:org.olat.search.service.document.file.pdf.PdfBoxExtractor.java

License:Apache License

private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug())
        log.debug("readContent from pdf starts...");
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {//from   www  .  j  a v  a 2  s  . c  o m
        bis = new BufferedInputStream(leaf.getInputStream());
        document = PDDocument.load(bis);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
                LimitedContentWriter writer = new LimitedContentWriter(128,
                        FileDocumentFactory.getMaxFileSize());
                writer.append(leaf.getName());
                writer.close();
                return new FileContent(leaf.getName(), writer.toString());
            }
        }
        String title = getTitle(document);
        if (log.isDebug())
            log.debug("readContent PDDocument loaded");
        PDFTextStripper stripper = new PDFTextStripper();
        LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
        stripper.writeText(document, writer);
        writer.close();
        return new FileContent(title, writer.toString());
    } finally {
        if (document != null) {
            document.close();
        }
        if (bis != null) {
            bis.close();
        }
    }
}

From source file:org.olat.search.service.document.file.PdfDocument.java

License:Apache License

private String extractTextFromPdf(final VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug()) {
        log.debug("readContent from pdf starts...");
    }/*www  .ja  va  2s . c o  m*/
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {
        bis = new BufferedInputStream(leaf.getInputStream());
        document = PDDocument.load(bis);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (final Exception e) {
                throw new DocumentAccessException(
                        "PDF is encrypted. Can not read content file=" + leaf.getName());
            }
        }
        if (log.isDebug()) {
            log.debug("readContent PDDocument loaded");
        }
        final PDFTextStripper stripper = new PDFTextStripper();
        return stripper.getText(document);
    } finally {
        if (document != null) {
            document.close();
        }
        if (bis != null) {
            bis.close();
        }
    }

}