Example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted

List of usage examples for org.apache.pdfbox.pdmodel PDDocument isEncrypted

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted.

Prototype

public boolean isEncrypted() 

Source Link

Document

This will tell if this document is encrypted or not.

Usage

From source file:org.apache.fop.render.pdf.pdfbox.AbstractPDFBoxHandler.java

License:Apache License

protected String createStreamForPDF(ImagePDF image, PDFPage targetPage, FOUserAgent userAgent,
        AffineTransform at, FontInfo fontinfo, Rectangle pos, Map<Integer, PDFArray> pageNumbers,
        PDFLogicalStructureHandler handler, PDFStructElem curentSessionElem) throws IOException {

    EventBroadcaster eventBroadcaster = null;
    if (userAgent != null) {
        eventBroadcaster = userAgent.getEventBroadcaster();
    }/*from w w  w  .j a  v a2 s.c o  m*/
    String originalImageUri = image.getInfo().getOriginalURI();
    final int selectedPage = ImageUtil.needPageIndexFromURI(originalImageUri);

    PDDocument pddoc = image.getPDDocument();
    float pdfVersion = pddoc.getDocument().getVersion();
    Version inputDocVersion = Version.getValueOf(String.valueOf(pdfVersion));
    PDFDocument pdfDoc = targetPage.getDocument();

    if (pdfDoc.getPDFVersion().compareTo(inputDocVersion) < 0) {
        try {
            pdfDoc.setPDFVersion(inputDocVersion);
        } catch (IllegalStateException e) {
            getEventProducer(eventBroadcaster).pdfVersionMismatch(this, pdfDoc.getPDFVersionString(),
                    String.valueOf(pdfVersion));
        }
    }

    //Encryption test
    if (pddoc.isEncrypted()) {
        getEventProducer(eventBroadcaster).encryptedPdf(this);
        return null;
    }

    //Warn about potential problems with PDF/A and PDF/X
    if (pdfDoc.getProfile().isPDFAActive()) {
        getEventProducer(eventBroadcaster).pdfAActive(this);
    }
    if (pdfDoc.getProfile().isPDFXActive()) {
        getEventProducer(eventBroadcaster).pdfXActive(this);
    }

    Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent);

    PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);

    if (targetPage.getPDFResources().getParentResources() == null) {
        PDFResources res = pdfDoc.getFactory().makeResources();
        res.setParentResources(pdfDoc.getResources());
        res.addContext(targetPage);
        targetPage.put("Resources", res);
    }

    PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers);
    if (handler != null) {
        adapter.setCurrentMCID(handler.getPageParentTree().length());
    }
    String stream = adapter.createStreamFromPDFBoxPage(pddoc, page, originalImageUri, at, fontinfo, pos);
    if (userAgent.isAccessibilityEnabled()) {
        TaggedPDFConductor conductor = new TaggedPDFConductor(curentSessionElem, handler, page, adapter);
        conductor.handleLogicalStructure(pddoc);
    }
    return stream;
}

From source file:org.apache.pdflens.Main.java

License:Apache License

/**
* This will parse a document./*from   w  w  w.  ja v  a2s .  c  om*/
*
* @param input The input stream for the document.
*
* @return The document.
*
* @throws IOException If there is an error parsing the document.
*/
private static PDDocument parseDocument(InputStream input) throws IOException {
    PDDocument document = PDDocument.load(input);
    if (document.isEncrypted()) {
        try {
            document.decrypt("");
        } catch (InvalidPasswordException e) {
            System.err.println("Error: The document is encrypted.");
        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
            e.printStackTrace();
        }
    }

    return document;
}

From source file:org.apache.syncope.client.console.wicket.markup.html.form.preview.BinaryPDFPreviewer.java

License:Apache License

@Override
public Component preview(final byte[] uploadedBytes) {
    firstPage = null;//  w  ww.j  a v a  2 s . c  o m

    PDDocument document = null;
    try {
        document = PDDocument.load(new ByteArrayInputStream(uploadedBytes));
        if (document.isEncrypted()) {
            LOG.info("Document is encrypted, no preview is possible");
        } else {
            firstPage = new PDFRenderer(document).renderImage(0, RESOLUTION, IMAGE_TYPE);
        }
    } catch (IOException e) {
        LOG.error("While generating thumbnail from first page", e);
    } finally {
        IOUtils.closeQuietly(document);
    }

    Fragment fragment;
    if (firstPage == null) {
        fragment = new Fragment("preview", "noPreviewFragment", this);
    } else {
        fragment = new Fragment("preview", "previewFragment", this);
        fragment.add(new NonCachingImage("previewImage", new ThumbnailImageResource(firstPage)));
    }

    WebMarkupContainer previewContainer = new WebMarkupContainer("previewContainer");
    previewContainer.setOutputMarkupId(true);
    previewContainer.add(fragment);
    return this.addOrReplace(previewContainer);
}

From source file:org.apache.tika.parser.pdf.EnhancedPDFParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDDocument pdfDocument = null;
    //config from context, or default if not set via context
    PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
    String password = "";
    try {/*from  ww w.j ava 2  s .  c o  m*/
        // PDFBox can process entirely in memory, or can use a temp file
        //  for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not already
        TikaInputStream tstream = TikaInputStream.cast(stream);
        password = getPassword(metadata, context);
        if (tstream != null && tstream.hasFile()) {
            // File based, take that as a cue to use a temporary file
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
        } else {
            // Go for the normal, stream based in-memory parsing
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
        }
        metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));

        pdfDocument.setAllSecurityToBeRemoved(true);

        metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
        extractMetadata(pdfDocument, metadata);
        if (handler != null) {
            String xfaXml = extractXFAText(pdfDocument);
            if (xfaXml != null) {
                try (BufferedInputStream is = new BufferedInputStream(
                        new ByteArrayInputStream(xfaXml.getBytes()))) {
                    new TXTParser().parse(is, handler, metadata, context);
                }
                metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
            } else {
                EnhancedPDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
            }
        }
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.apache.tika.parser.pdf.PDFParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDDocument pdfDocument = null;
    TemporaryResources tmp = new TemporaryResources();
    //config from context, or default if not set via context
    PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
    String password = "";
    try {/*from   w w  w. j  av a 2  s.c o m*/
        // PDFBox can process entirely in memory, or can use a temp file
        //  for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not already
        TikaInputStream tstream = TikaInputStream.cast(stream);
        password = getPassword(metadata, context);
        if (tstream != null && tstream.hasFile()) {
            // File based, take that as a cue to use a temporary file
            RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile, password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
            }
        } else {
            // Go for the normal, stream based in-memory parsing
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream),
                        new RandomAccessBuffer(), password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
        }
        metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));

        //if using the classic parser and the doc is encrypted, we must manually decrypt
        if (!localConfig.getUseNonSequentialParser() && pdfDocument.isEncrypted()) {
            pdfDocument.decrypt(password);
        }

        metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
        extractMetadata(pdfDocument, metadata);

        AccessChecker checker = localConfig.getAccessChecker();
        checker.check(metadata);
        if (handler != null) {
            if (shouldHandleXFAOnly(pdfDocument, localConfig)) {
                handleXFAOnly(pdfDocument, handler, metadata);
            } else {
                PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
            }
        }

    } catch (CryptographyException e) {
        //seq parser throws CryptographyException for bad password
        throw new EncryptedDocumentException(e);
    } catch (IOException e) {
        //nonseq parser throws IOException for bad password
        //At the Tika level, we want the same exception to be thrown
        if (e.getMessage() != null && e.getMessage().contains("Error (CryptographyException)")) {
            metadata.set("pdf:encrypted", Boolean.toString(true));
            throw new EncryptedDocumentException(e);
        }
        //rethrow any other IOExceptions
        throw e;
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
        tmp.dispose();
        //TODO: once we migrate to PDFBox 2.0, remove this (PDFBOX-2200)
        PDFont.clearResources();
    }
}

From source file:org.apache.tika.parser.pdf.PDFPureJavaParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDFPureJavaParserConfig localConfig = context.get(PDFPureJavaParserConfig.class, defaultConfig);

    PDDocument pdfDocument = null;

    String password = "";
    try {//from w w w  .  j  a  v  a 2s  . c  o m
        // PDFBox can process entirely in memory, or can use a temp file
        //  for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not already
        //TODO: make this configurable via MemoryUsageSetting
        TikaInputStream tstream = TikaInputStream.cast(stream);
        password = getPassword(metadata, context);
        if (tstream != null && tstream.hasFile()) {
            // File based -- send file directly to PDFBox
            pdfDocument = PDDocument.load(tstream.getPath().toFile(), password);
        } else {
            pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password);
        }
        metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(pdfDocument.isEncrypted()));

        metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString());
        extractMetadata(pdfDocument, metadata, context);
        AccessChecker checker = localConfig.getAccessChecker();
        checker.check(metadata);
        if (handler != null) {
            if (shouldHandleXFAOnly(pdfDocument, localConfig)) {
                handleXFAOnly(pdfDocument, handler, metadata, context);
            } else if (localConfig.getOcrStrategy().equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_ONLY)) {
                metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser");
                // No-ops. Do not support OCR parser.
            } else {
                if (localConfig.getOcrStrategy()
                        .equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
                    metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser");
                }
                PDF2XHTMLPureJava.process(pdfDocument, handler, context, metadata, localConfig);
            }
        }
    } catch (InvalidPasswordException e) {
        metadata.set(PDF.IS_ENCRYPTED, "true");
        throw new EncryptedDocumentException(e);
    } catch (final PdfTimeoutException e) {
        throw new TikaPdfTimeoutException("PdfTimeoutException", e);
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:org.codelibs.robot.extractor.impl.PdfExtractor.java

License:Apache License

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new RobotSystemException("The inputstream is null.");
    }// w w w  .j  ava2s  .c  o  m
    synchronized (pdfBoxLockObj) {
        PDDocument document = null;
        try {
            document = PDDocument.load(in, null, force);
            if (document.isEncrypted() && params != null) {
                String password = params.get(ExtractData.PDF_PASSWORD);
                if (password == null) {
                    password = getPassword(params.get(ExtractData.URL),
                            params.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
                }
                if (password != null) {
                    final StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password);
                    document.openProtection(sdm);
                    final AccessPermission ap = document.getCurrentAccessPermission();

                    if (!ap.canExtractContent()) {
                        throw new IOException("You do not have permission to extract text.");
                    }
                }
            }

            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final Writer output = new OutputStreamWriter(baos, encoding);
            final PDFTextStripper stripper = new PDFTextStripper(encoding);
            stripper.setForceParsing(force);
            final AtomicBoolean done = new AtomicBoolean(false);
            final PDDocument doc = document;
            final Set<Exception> exceptionSet = new HashSet<>();
            final Thread task = new Thread(() -> {
                try {
                    stripper.writeText(doc, output);
                } catch (final Exception e) {
                    exceptionSet.add(e);
                } finally {
                    done.set(true);
                }
            });
            task.setDaemon(true);
            task.start();
            task.join(timeout);
            if (!done.get()) {
                for (int i = 0; i < 100 && !done.get(); i++) {
                    task.interrupt();
                    Thread.sleep(50);
                }
                throw new ExtractException("PDFBox process cannot finish in " + timeout + " sec.");
            } else if (!exceptionSet.isEmpty()) {
                throw exceptionSet.iterator().next();
            }
            output.flush();
            final ExtractData extractData = new ExtractData(baos.toString(encoding));
            extractMetadata(document, extractData);
            return extractData;
        } catch (final Exception e) {
            throw new ExtractException(e);
        } finally {
            if (document != null) {
                try {
                    document.close();
                } catch (final IOException e) {
                    // NOP
                }
            }
        }
    }
}

From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxSource.java

License:Apache License

@NotNull
protected static PDDocument openPdfDocument(@NotNull final File pdfFile, @Nullable final String password) {

    long t0 = System.currentTimeMillis();

    MDC.put("doc", pdfFile.getName());
    log.info("LOG00120:Opening PDF file " + pdfFile + ".");

    try {//  w w  w.  j a va2  s . co  m
        final PDDocument document = PDDocument.load(pdfFile);

        if (document.isEncrypted()) {
            if (password != null) {
                try {
                    document.decrypt(password);
                } catch (Exception e) {
                    throw new RuntimeException("Error while reading encrypted PDF:", e);
                }
            } else {
                log.warn("File claims to be encrypted, a password should be provided");
            }
        }

        log.debug("load()took" + (System.currentTimeMillis() - t0) + "ms");

        return document;
    } catch (IOException e) {
        MDC.put("doc", "");

        throw new RuntimeException("Error while reading " + pdfFile + ".", e);
    }
}

From source file:org.exoplatform.services.document.impl.PDFDocumentReader.java

License:Open Source License

public Properties getProperties(final InputStream is) throws IOException, DocumentReadException {
    try {// w w  w .  j a  v  a 2 s.co  m
        return SecurityHelper.doPrivilegedExceptionAction(new PrivilegedExceptionAction<Properties>() {
            public Properties run() throws Exception {
                if (is == null) {
                    throw new IllegalArgumentException("InputStream is null.");
                }

                PDDocument pdDocument = PDDocument.load(is);
                Properties props = new Properties();
                try {
                    if (pdDocument.isEncrypted()) {
                        try {
                            pdDocument.decrypt("");
                        } catch (InvalidPasswordException e) {
                            throw new DocumentReadException("The pdf document is encrypted.", e);
                        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
                            throw new DocumentReadException(e.getMessage(), e);
                        }
                    }

                    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
                    PDMetadata meta = catalog.getMetadata();
                    if (meta != null) {
                        XMPMetadata metadata = meta.exportXMPMetadata();

                        XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
                        if (dc != null) {
                            try {
                                if (dc.getTitle() != null)
                                    props.put(DCMetaData.TITLE, fixEncoding(dc.getTitle()));
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }
                            try {
                                if (dc.getDescription() != null)
                                    props.put(DCMetaData.DESCRIPTION, fixEncoding(dc.getDescription()));
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getCreators() != null) {
                                    for (String creator : dc.getCreators()) {
                                        props.put(DCMetaData.CREATOR, fixEncoding(creator));
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }

                            try {
                                if (dc.getDates() != null) {
                                    for (Calendar date : dc.getDates()) {
                                        props.put(DCMetaData.DATE, date);
                                    }
                                }
                            } catch (Exception e) {
                                LOG.warn("getDate failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaPDF pdf = metadata.getPDFSchema();
                        if (pdf != null) {
                            try {
                                if (pdf.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, fixEncoding(pdf.getKeywords()));
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }

                            try {
                                if (pdf.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, fixEncoding(pdf.getProducer()));
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                        }

                        XMPSchemaBasic basic = metadata.getBasicSchema();
                        if (basic != null) {
                            try {
                                if (basic.getCreateDate() != null)
                                    props.put(DCMetaData.DATE, basic.getCreateDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (basic.getModifyDate() != null)
                                    props.put(DCMetaData.DATE, basic.getModifyDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }

                            // DCMetaData.PUBLISHER - basic.getCreatorTool()
                        }
                    }

                    if (props.isEmpty()) {
                        // The pdf doesn't contain any metadata, try to use the document
                        // information instead
                        PDDocumentInformation docInfo = pdDocument.getDocumentInformation();

                        if (docInfo != null) {
                            try {
                                if (docInfo.getAuthor() != null)
                                    props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor());
                            } catch (Exception e) {
                                LOG.warn("getAuthor failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getCreationDate());
                            } catch (Exception e) {
                                LOG.warn("getCreationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getCreator() != null)
                                    props.put(DCMetaData.CREATOR, docInfo.getCreator());
                            } catch (Exception e) {
                                LOG.warn("getCreator failed: " + e.getMessage());
                            }
                            try {

                                if (docInfo.getKeywords() != null)
                                    props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
                            } catch (Exception e) {
                                LOG.warn("getKeywords failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getModificationDate() != null)
                                    props.put(DCMetaData.DATE, docInfo.getModificationDate());
                            } catch (Exception e) {
                                LOG.warn("getModificationDate failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getProducer() != null)
                                    props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                            } catch (Exception e) {
                                LOG.warn("getProducer failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getSubject() != null)
                                    props.put(DCMetaData.DESCRIPTION, docInfo.getSubject());
                            } catch (Exception e) {
                                LOG.warn("getSubject failed: " + e.getMessage());
                            }
                            try {
                                if (docInfo.getTitle() != null)
                                    props.put(DCMetaData.TITLE, docInfo.getTitle());
                            } catch (Exception e) {
                                LOG.warn("getTitle failed: " + e.getMessage());
                            }

                            // docInfo.getTrapped();
                        }
                    }
                } finally {
                    if (pdDocument != null) {
                        pdDocument.close();
                    }

                    if (is != null) {
                        try {
                            is.close();
                        } catch (IOException e) {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("An exception occurred: " + e.getMessage());
                            }
                        }
                    }
                }
                return props;
            }
        });

    } catch (PrivilegedActionException pae) {
        Throwable cause = pae.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        } else if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        } else {
            throw new RuntimeException(cause);
        }
    }
}

From source file:org.github.jipsg.pdfbox.PDDocumentFactory.java

License:Apache License

/**
 * Create a PDFBox document.//from ww w .j  av a 2s  .c  o m
 *
 * @param source An opaque source
 * @return the document
 * @throws IOException the creation failed
 */
public PDDocument create(Object source) throws IOException {

    PDDocument result;
    InputStream is = null;
    String sourceName = "unknown";

    try {
        if (source instanceof File) {
            File sourceFile = (File) source;
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile);
        } else if (source instanceof InputStream) {
            is = (InputStream) source;
            result = PDDocument.load(is, true);
        } else if (source instanceof DataSource) {
            is = ((DataSource) source).getInputStream();
            result = PDDocument.load(is, true);
        } else if (source instanceof byte[]) {
            is = new ByteArrayInputStream((byte[]) source);
            result = PDDocument.load(is, true);
        } else if (source instanceof String) {
            File sourceFile = new File((String) source);
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile.getAbsoluteFile());
        } else {
            throw new IllegalAccessException("Don't know how to handle : " + source.getClass().getName());
        }

        if (result.isEncrypted()) {
            result.decrypt("");
        }

        return result;
    } catch (Exception e) {
        String msg = "Parsing the PDF document failed : name=" + sourceName + ", type="
                + source.getClass().getName();
        throw new IOException(msg, e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
}