List of usage examples for org.apache.pdfbox.pdmodel PDDocument isEncrypted
public boolean isEncrypted()
From source file:org.apache.fop.render.pdf.pdfbox.AbstractPDFBoxHandler.java
License:Apache License
protected String createStreamForPDF(ImagePDF image, PDFPage targetPage, FOUserAgent userAgent, AffineTransform at, FontInfo fontinfo, Rectangle pos, Map<Integer, PDFArray> pageNumbers, PDFLogicalStructureHandler handler, PDFStructElem curentSessionElem) throws IOException { EventBroadcaster eventBroadcaster = null; if (userAgent != null) { eventBroadcaster = userAgent.getEventBroadcaster(); }/*from w w w .j a v a2 s.c o m*/ String originalImageUri = image.getInfo().getOriginalURI(); final int selectedPage = ImageUtil.needPageIndexFromURI(originalImageUri); PDDocument pddoc = image.getPDDocument(); float pdfVersion = pddoc.getDocument().getVersion(); Version inputDocVersion = Version.getValueOf(String.valueOf(pdfVersion)); PDFDocument pdfDoc = targetPage.getDocument(); if (pdfDoc.getPDFVersion().compareTo(inputDocVersion) < 0) { try { pdfDoc.setPDFVersion(inputDocVersion); } catch (IllegalStateException e) { getEventProducer(eventBroadcaster).pdfVersionMismatch(this, pdfDoc.getPDFVersionString(), String.valueOf(pdfVersion)); } } //Encryption test if (pddoc.isEncrypted()) { getEventProducer(eventBroadcaster).encryptedPdf(this); return null; } //Warn about potential problems with PDF/A and PDF/X if (pdfDoc.getProfile().isPDFAActive()) { getEventProducer(eventBroadcaster).pdfAActive(this); } if (pdfDoc.getProfile().isPDFXActive()) { getEventProducer(eventBroadcaster).pdfXActive(this); } Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent); PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage); if (targetPage.getPDFResources().getParentResources() == null) { PDFResources res = pdfDoc.getFactory().makeResources(); res.setParentResources(pdfDoc.getResources()); res.addContext(targetPage); targetPage.put("Resources", res); } PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers); if (handler != null) { adapter.setCurrentMCID(handler.getPageParentTree().length()); } String stream = adapter.createStreamFromPDFBoxPage(pddoc, page, originalImageUri, at, fontinfo, pos); if (userAgent.isAccessibilityEnabled()) { TaggedPDFConductor conductor = new TaggedPDFConductor(curentSessionElem, handler, page, adapter); conductor.handleLogicalStructure(pddoc); } return stream; }
From source file:org.apache.pdflens.Main.java
License:Apache License
/** * This will parse a document./*from w w w. ja v a2s . c om*/ * * @param input The input stream for the document. * * @return The document. * * @throws IOException If there is an error parsing the document. */ private static PDDocument parseDocument(InputStream input) throws IOException { PDDocument document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(""); } catch (InvalidPasswordException e) { System.err.println("Error: The document is encrypted."); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { e.printStackTrace(); } } return document; }
From source file:org.apache.syncope.client.console.wicket.markup.html.form.preview.BinaryPDFPreviewer.java
License:Apache License
@Override public Component preview(final byte[] uploadedBytes) { firstPage = null;// w ww.j a v a 2 s . c o m PDDocument document = null; try { document = PDDocument.load(new ByteArrayInputStream(uploadedBytes)); if (document.isEncrypted()) { LOG.info("Document is encrypted, no preview is possible"); } else { firstPage = new PDFRenderer(document).renderImage(0, RESOLUTION, IMAGE_TYPE); } } catch (IOException e) { LOG.error("While generating thumbnail from first page", e); } finally { IOUtils.closeQuietly(document); } Fragment fragment; if (firstPage == null) { fragment = new Fragment("preview", "noPreviewFragment", this); } else { fragment = new Fragment("preview", "previewFragment", this); fragment.add(new NonCachingImage("previewImage", new ThumbnailImageResource(firstPage))); } WebMarkupContainer previewContainer = new WebMarkupContainer("previewContainer"); previewContainer.setOutputMarkupId(true); previewContainer.add(fragment); return this.addOrReplace(previewContainer); }
From source file:org.apache.tika.parser.pdf.EnhancedPDFParser.java
License:Apache License
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { PDDocument pdfDocument = null; //config from context, or default if not set via context PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig); String password = ""; try {/*from ww w.j ava 2 s . c o m*/ // PDFBox can process entirely in memory, or can use a temp file // for unpacked / processed resources // Decide which to do based on if we're reading from a file or not already TikaInputStream tstream = TikaInputStream.cast(stream); password = getPassword(metadata, context); if (tstream != null && tstream.hasFile()) { // File based, take that as a cue to use a temporary file if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); } } else { // Go for the normal, stream based in-memory parsing if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); } } metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted())); pdfDocument.setAllSecurityToBeRemoved(true); metadata.set(Metadata.CONTENT_TYPE, "application/pdf"); extractMetadata(pdfDocument, metadata); if (handler != null) { String xfaXml = extractXFAText(pdfDocument); if (xfaXml != null) { try (BufferedInputStream is = new BufferedInputStream( new ByteArrayInputStream(xfaXml.getBytes()))) { new TXTParser().parse(is, handler, metadata, context); } metadata.set(Metadata.CONTENT_TYPE, "application/pdf"); } else { EnhancedPDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } } } finally { if (pdfDocument != null) { pdfDocument.close(); } } }
From source file:org.apache.tika.parser.pdf.PDFParser.java
License:Apache License
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { PDDocument pdfDocument = null; TemporaryResources tmp = new TemporaryResources(); //config from context, or default if not set via context PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig); String password = ""; try {/*from w w w. j av a 2 s.c o m*/ // PDFBox can process entirely in memory, or can use a temp file // for unpacked / processed resources // Decide which to do based on if we're reading from a file or not already TikaInputStream tstream = TikaInputStream.cast(stream); password = getPassword(metadata, context); if (tstream != null && tstream.hasFile()) { // File based, take that as a cue to use a temporary file RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw"); if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile, password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true); } } else { // Go for the normal, stream based in-memory parsing if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer(), password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); } } metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted())); //if using the classic parser and the doc is encrypted, we must manually decrypt if (!localConfig.getUseNonSequentialParser() && pdfDocument.isEncrypted()) { pdfDocument.decrypt(password); } metadata.set(Metadata.CONTENT_TYPE, "application/pdf"); extractMetadata(pdfDocument, metadata); AccessChecker checker = localConfig.getAccessChecker(); checker.check(metadata); if (handler != null) { if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata); } else { PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } } } catch (CryptographyException e) { //seq parser throws CryptographyException for bad password throw new EncryptedDocumentException(e); } catch (IOException e) { //nonseq parser throws IOException for bad password //At the Tika level, we want the same exception to be thrown if (e.getMessage() != null && e.getMessage().contains("Error (CryptographyException)")) { metadata.set("pdf:encrypted", Boolean.toString(true)); throw new EncryptedDocumentException(e); } //rethrow any other IOExceptions throw e; } finally { if (pdfDocument != null) { pdfDocument.close(); } tmp.dispose(); //TODO: once we migrate to PDFBox 2.0, remove this (PDFBOX-2200) PDFont.clearResources(); } }
From source file:org.apache.tika.parser.pdf.PDFPureJavaParser.java
License:Apache License
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { PDFPureJavaParserConfig localConfig = context.get(PDFPureJavaParserConfig.class, defaultConfig); PDDocument pdfDocument = null; String password = ""; try {//from w w w . j a v a 2s . c o m // PDFBox can process entirely in memory, or can use a temp file // for unpacked / processed resources // Decide which to do based on if we're reading from a file or not already //TODO: make this configurable via MemoryUsageSetting TikaInputStream tstream = TikaInputStream.cast(stream); password = getPassword(metadata, context); if (tstream != null && tstream.hasFile()) { // File based -- send file directly to PDFBox pdfDocument = PDDocument.load(tstream.getPath().toFile(), password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), password); } metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(pdfDocument.isEncrypted())); metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString()); extractMetadata(pdfDocument, metadata, context); AccessChecker checker = localConfig.getAccessChecker(); checker.check(metadata); if (handler != null) { if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata, context); } else if (localConfig.getOcrStrategy().equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_ONLY)) { metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser"); // No-ops. Do not support OCR parser. } else { if (localConfig.getOcrStrategy() .equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser"); } PDF2XHTMLPureJava.process(pdfDocument, handler, context, metadata, localConfig); } } } catch (InvalidPasswordException e) { metadata.set(PDF.IS_ENCRYPTED, "true"); throw new EncryptedDocumentException(e); } catch (final PdfTimeoutException e) { throw new TikaPdfTimeoutException("PdfTimeoutException", e); } finally { if (pdfDocument != null) { pdfDocument.close(); } } }
From source file:org.codelibs.robot.extractor.impl.PdfExtractor.java
License:Apache License
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); }// w w w .j ava2s .c o m synchronized (pdfBoxLockObj) { PDDocument document = null; try { document = PDDocument.load(in, null, force); if (document.isEncrypted() && params != null) { String password = params.get(ExtractData.PDF_PASSWORD); if (password == null) { password = getPassword(params.get(ExtractData.URL), params.get(TikaMetadataKeys.RESOURCE_NAME_KEY)); } if (password != null) { final StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password); document.openProtection(sdm); final AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract text."); } } } final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final Writer output = new OutputStreamWriter(baos, encoding); final PDFTextStripper stripper = new PDFTextStripper(encoding); stripper.setForceParsing(force); final AtomicBoolean done = new AtomicBoolean(false); final PDDocument doc = document; final Set<Exception> exceptionSet = new HashSet<>(); final Thread task = new Thread(() -> { try { stripper.writeText(doc, output); } catch (final Exception e) { exceptionSet.add(e); } finally { done.set(true); } }); task.setDaemon(true); task.start(); task.join(timeout); if (!done.get()) { for (int i = 0; i < 100 && !done.get(); i++) { task.interrupt(); Thread.sleep(50); } throw new ExtractException("PDFBox process cannot finish in " + timeout + " sec."); } else if (!exceptionSet.isEmpty()) { throw exceptionSet.iterator().next(); } output.flush(); final ExtractData extractData = new ExtractData(baos.toString(encoding)); extractMetadata(document, extractData); return extractData; } catch (final Exception e) { throw new ExtractException(e); } finally { if (document != null) { try { document.close(); } catch (final IOException e) { // NOP } } } } }
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxSource.java
License:Apache License
@NotNull protected static PDDocument openPdfDocument(@NotNull final File pdfFile, @Nullable final String password) { long t0 = System.currentTimeMillis(); MDC.put("doc", pdfFile.getName()); log.info("LOG00120:Opening PDF file " + pdfFile + "."); try {// w w w. j a va2 s . co m final PDDocument document = PDDocument.load(pdfFile); if (document.isEncrypted()) { if (password != null) { try { document.decrypt(password); } catch (Exception e) { throw new RuntimeException("Error while reading encrypted PDF:", e); } } else { log.warn("File claims to be encrypted, a password should be provided"); } } log.debug("load()took" + (System.currentTimeMillis() - t0) + "ms"); return document; } catch (IOException e) { MDC.put("doc", ""); throw new RuntimeException("Error while reading " + pdfFile + ".", e); } }
From source file:org.exoplatform.services.document.impl.PDFDocumentReader.java
License:Open Source License
public Properties getProperties(final InputStream is) throws IOException, DocumentReadException { try {// w w w . j a v a 2 s.co m return SecurityHelper.doPrivilegedExceptionAction(new PrivilegedExceptionAction<Properties>() { public Properties run() throws Exception { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } PDDocument pdDocument = PDDocument.load(is); Properties props = new Properties(); try { if (pdDocument.isEncrypted()) { try { pdDocument.decrypt(""); } catch (InvalidPasswordException e) { throw new DocumentReadException("The pdf document is encrypted.", e); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { throw new DocumentReadException(e.getMessage(), e); } } PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); PDMetadata meta = catalog.getMetadata(); if (meta != null) { XMPMetadata metadata = meta.exportXMPMetadata(); XMPSchemaDublinCore dc = metadata.getDublinCoreSchema(); if (dc != null) { try { if (dc.getTitle() != null) props.put(DCMetaData.TITLE, fixEncoding(dc.getTitle())); } catch (Exception e) { LOG.warn("getTitle failed: " + e.getMessage()); } try { if (dc.getDescription() != null) props.put(DCMetaData.DESCRIPTION, fixEncoding(dc.getDescription())); } catch (Exception e) { LOG.warn("getSubject failed: " + e.getMessage()); } try { if (dc.getCreators() != null) { for (String creator : dc.getCreators()) { props.put(DCMetaData.CREATOR, fixEncoding(creator)); } } } catch (Exception e) { LOG.warn("getCreator failed: " + e.getMessage()); } try { if (dc.getDates() != null) { for (Calendar date : dc.getDates()) { props.put(DCMetaData.DATE, date); } } } catch (Exception e) { LOG.warn("getDate failed: " + e.getMessage()); } } XMPSchemaPDF pdf = metadata.getPDFSchema(); if (pdf != null) { try { if (pdf.getKeywords() != null) props.put(DCMetaData.SUBJECT, fixEncoding(pdf.getKeywords())); } catch (Exception e) { LOG.warn("getKeywords failed: " + e.getMessage()); } try { if (pdf.getProducer() != null) props.put(DCMetaData.PUBLISHER, fixEncoding(pdf.getProducer())); } catch (Exception e) { LOG.warn("getProducer failed: " + e.getMessage()); } } XMPSchemaBasic basic = metadata.getBasicSchema(); if (basic != null) { try { if (basic.getCreateDate() != null) props.put(DCMetaData.DATE, basic.getCreateDate()); } catch (Exception e) { LOG.warn("getCreationDate failed: " + e.getMessage()); } try { if (basic.getModifyDate() != null) props.put(DCMetaData.DATE, basic.getModifyDate()); } catch (Exception e) { LOG.warn("getModificationDate failed: " + e.getMessage()); } // DCMetaData.PUBLISHER - basic.getCreatorTool() } } if (props.isEmpty()) { // The pdf doesn't contain any metadata, try to use the document // information instead PDDocumentInformation docInfo = pdDocument.getDocumentInformation(); if (docInfo != null) { try { if (docInfo.getAuthor() != null) props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor()); } catch (Exception e) { LOG.warn("getAuthor failed: " + e.getMessage()); } try { if (docInfo.getCreationDate() != null) props.put(DCMetaData.DATE, docInfo.getCreationDate()); } catch (Exception e) { LOG.warn("getCreationDate failed: " + e.getMessage()); } try { if (docInfo.getCreator() != null) props.put(DCMetaData.CREATOR, docInfo.getCreator()); } catch (Exception e) { LOG.warn("getCreator failed: " + e.getMessage()); } try { if (docInfo.getKeywords() != null) props.put(DCMetaData.SUBJECT, docInfo.getKeywords()); } catch (Exception e) { LOG.warn("getKeywords failed: " + e.getMessage()); } try { if (docInfo.getModificationDate() != null) props.put(DCMetaData.DATE, docInfo.getModificationDate()); } catch (Exception e) { LOG.warn("getModificationDate failed: " + e.getMessage()); } try { if (docInfo.getProducer() != null) props.put(DCMetaData.PUBLISHER, docInfo.getProducer()); } catch (Exception e) { LOG.warn("getProducer failed: " + e.getMessage()); } try { if (docInfo.getSubject() != null) props.put(DCMetaData.DESCRIPTION, docInfo.getSubject()); } catch (Exception e) { LOG.warn("getSubject failed: " + e.getMessage()); } try { if (docInfo.getTitle() != null) props.put(DCMetaData.TITLE, docInfo.getTitle()); } catch (Exception e) { LOG.warn("getTitle failed: " + e.getMessage()); } // docInfo.getTrapped(); } } } finally { if (pdDocument != null) { pdDocument.close(); } if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return props; } }); } catch (PrivilegedActionException pae) { Throwable cause = pae.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new RuntimeException(cause); } } }
From source file:org.github.jipsg.pdfbox.PDDocumentFactory.java
License:Apache License
/** * Create a PDFBox document.//from ww w .j av a 2s .c o m * * @param source An opaque source * @return the document * @throws IOException the creation failed */ public PDDocument create(Object source) throws IOException { PDDocument result; InputStream is = null; String sourceName = "unknown"; try { if (source instanceof File) { File sourceFile = (File) source; sourceName = sourceFile.getName(); result = PDDocument.load(sourceFile); } else if (source instanceof InputStream) { is = (InputStream) source; result = PDDocument.load(is, true); } else if (source instanceof DataSource) { is = ((DataSource) source).getInputStream(); result = PDDocument.load(is, true); } else if (source instanceof byte[]) { is = new ByteArrayInputStream((byte[]) source); result = PDDocument.load(is, true); } else if (source instanceof String) { File sourceFile = new File((String) source); sourceName = sourceFile.getName(); result = PDDocument.load(sourceFile.getAbsoluteFile()); } else { throw new IllegalAccessException("Don't know how to handle : " + source.getClass().getName()); } if (result.isEncrypted()) { result.decrypt(""); } return result; } catch (Exception e) { String msg = "Parsing the PDF document failed : name=" + sourceName + ", type=" + source.getClass().getName(); throw new IOException(msg, e); } finally { if (is != null) { is.close(); } } }