List of usage examples for org.apache.pdfbox.pdmodel PDDocument isEncrypted
public boolean isEncrypted()
From source file:org.opencps.util.ExtractTextLocations.java
License:Open Source License
public ExtractTextLocations(String fullPath) throws IOException { PDDocument document = null; try {//from ww w . ja v a 2 s . c om File input = new File(fullPath); document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(StringPool.BLANK); } catch (Exception e) { _log.error(e); } } // ExtractTextLocations printer = new ExtractTextLocations(); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages != null && allPages.size() > 0) { PDPage page = (PDPage) allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { this.processStream(page, page.findResources(), page.getContents().getStream()); } PDRectangle pageSize = page.findMediaBox(); if (pageSize != null) { setPageWidth(pageSize.getWidth()); setPageHeight(pageSize.getHeight()); setPageLLX(pageSize.getLowerLeftX()); setPageURX(pageSize.getUpperRightX()); setPageLLY(pageSize.getLowerLeftY()); setPageURY(pageSize.getUpperRightY()); } } } catch (Exception e) { _log.error(e); } finally { if (document != null) { document.close(); } } }
From source file:org.paxle.parser.pdf.impl.PdfParser.java
License:Open Source License
public IParserDocument parse(URI location, String charset, InputStream fileIn) throws ParserException, UnsupportedEncodingException, IOException { IParserDocument parserDoc = null;// w w w.ja v a 2 s. c om PDDocument pddDoc = null; try { final IParserContext pc = this.contextLocal.getCurrentContext(); final ICommandProfile cmdProfile = pc.getCommandProfile(); // create an empty document parserDoc = pc.createDocument(); // parse it final PDFParser parser = new PDFParser(fileIn); parser.parse(); pddDoc = parser.getPDDocument(); // check document encryption if (pddDoc.isEncrypted()) { if (this.logger.isDebugEnabled()) { this.logger.debug(String.format("Document '%s' is encrypted.", location)); } // determine the decryption password String pwd = ""; if (cmdProfile != null) { String tmp = (String) cmdProfile.getProperty("org.paxle.parser.pdf.impl.decryptionPassword"); if (tmp != null) pwd = tmp; } // try to open document with the given password try { final StandardDecryptionMaterial dm = new StandardDecryptionMaterial(pwd); pddDoc.openProtection(dm); final AccessPermission accessPermission = pddDoc.getCurrentAccessPermission(); if (accessPermission == null || !accessPermission.canExtractContent()) { if (this.logger.isInfoEnabled()) { this.logger.debug( String.format("No permission to extract content of document '%s'.", location)); } parserDoc.setStatus(IParserDocument.Status.FAILURE, "PDF Document is encrypted."); return parserDoc; } } catch (Throwable e) { this.logger.error(String.format("Unable to decrypt document '%s'.", location), e); parserDoc.setStatus(IParserDocument.Status.FAILURE, String .format("Unable to decrypt document. %s: %s", e.getClass().getName(), e.getMessage())); return parserDoc; } } // extract metadata this.extractMetaData(parserDoc, pddDoc); // extract text final PDFTextStripper stripper = new PDFTextStripper(); // XXX: we could limit the amount of parsed pages via crawling-profile properties? // stripper.setStartPage(startPageValue); // stripper.setEndPage(endPageValue); final Writer pdocWriter = parserDoc.getTextWriter(); stripper.writeText(pddDoc, pdocWriter); pdocWriter.flush(); // extracting URIs this.extractURLs(parserDoc, pddDoc); // extracting embedded files this.extractEmbeddedFiles(location, parserDoc, pddDoc); parserDoc.setStatus(IParserDocument.Status.OK); return parserDoc; } catch (Throwable e) { throw new ParserException("Error parsing pdf document. " + e.getMessage(), e); } finally { if (pddDoc != null) try { pddDoc.close(); } catch (Exception e) { this.logger.error(e); } } }
From source file:org.pdfgal.pdfgal.validator.impl.PDFGalValidatorImpl.java
License:Open Source License
@Override public boolean isEncrypted(final String path) { boolean result = false; PDDocument document; try {// www . j a va 2 s. c o m document = PDDocument.load(path); result = document.isEncrypted(); document.close(); } catch (final IOException e) { result = false; } return result; }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Save Outlines (bookmarks).// ww w . ja v a 2 s .c om * * @param pdfFile * Source PDF file. * @param outlinesFile * File with Outlines (bookmarks) in user-frendly format. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.java?view=markup */ public static void saveOutlines(final File pdfFile, final File outlinesFile) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Get data from PDF file. final PDDocumentCatalog catalog = document.getDocumentCatalog(); final PDDocumentOutline outlines = catalog.getDocumentOutline(); final PDPageTree pages = catalog.getPages(); final PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(catalog); final PDDestinationNameTreeNode destinations = namesDictionary.getDests(); // Convert. final List<String> lines = OutlineHelper.outlinesToLineList(outlines, pages, destinations); // Write line list into the text file. Files.write(outlinesFile.toPath(), lines); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Update Outlines (bookmarks)./*from ww w. j a v a 2s . com*/ * * @param pdfFile * Source PDF file. * @param outlinesFile * File with Outlines (bookmarks) in user-frendly format. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.java?view=markup */ public static void updateOutlines(final File pdfFile, final File outlinesFile) throws IOException { // Read bookmark list from text file. final List<String> lines = Files.readAllLines(outlinesFile.toPath()); PDDocument document = null; try { // Open PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Get data from PDF file. final PDDocumentCatalog catalog = document.getDocumentCatalog(); final PDPageTree pages = catalog.getPages(); // Convert. final PDDocumentOutline outlines = OutlineHelper.lineListToOutlines(pages, lines); // Set outlines. catalog.setDocumentOutline(outlines); // Create temporary PDF file for result. if (TEMP_PDF.exists()) { TEMP_PDF.delete(); } // Save result to temporary PDF file. document.save(TEMP_PDF); // Replace original PDF file. pdfFile.delete(); Files.move(Paths.get(TEMP_PDF.toURI()), Paths.get(pdfFile.toURI())); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Save Metadata.//from ww w .ja v a 2s.c om * * @param pdfFile * Source PDF file. * @param metadataFile * File with Metadata in user-frendly format. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java?view=markup */ public static void saveMetadata(final File pdfFile, final File metadataFile) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Get data from PDF file. final PDDocumentInformation information = document.getDocumentInformation(); // Convert. final List<String> lines = MetadataHelper.metadataToLineList(information); // Write line list into the text file. Files.write(metadataFile.toPath(), lines); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Update Metadata.// w w w .j a v a 2 s. c om * * @param pdfFile * Source PDF file. * @param metadataFile * File with Metadata in user-frendly format. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java?view=markup */ public static void updateMetadata(final File pdfFile, final File metadataFile) throws IOException { // Read bookmark list from text file. final List<String> lines = Files.readAllLines(metadataFile.toPath()); PDDocument document = null; try { // Open PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Convert. final PDDocumentInformation information = MetadataHelper.stringListToMetadata(lines); // Set Metadata. document.setDocumentInformation(information); // Create temporary PDF file for result. if (TEMP_PDF.exists()) { TEMP_PDF.delete(); } // Save result to temporary PDF file. document.save(TEMP_PDF); // Replace original PDF file. pdfFile.delete(); Files.move(Paths.get(TEMP_PDF.toURI()), Paths.get(pdfFile.toURI())); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Save all Attached (embedded) files to some directory. * // w w w .j av a 2s .c o m * @param pdfFile * Source PDF file. * @param outputDir * Target directory. * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java?view=markup */ public static void saveAttachments(final File pdfFile, final File outputDir) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Extract Embedded (attached) files. final PDDocumentNameDictionary documentNameDictionary = new PDDocumentNameDictionary( document.getDocumentCatalog()); final PDEmbeddedFilesNameTreeNode embeddedFilesNameTree = documentNameDictionary.getEmbeddedFiles(); if (embeddedFilesNameTree != null) { extractFiles(outputDir, embeddedFilesNameTree.getNames()); final List<PDNameTreeNode<PDComplexFileSpecification>> kids = embeddedFilesNameTree.getKids(); if (kids != null) { for (PDNameTreeNode<PDComplexFileSpecification> nameTreeNode : kids) { extractFiles(outputDir, nameTreeNode.getNames()); } } } // Extract Embedded (attached) from annotations. for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { final PDAnnotationFileAttachment fileAttach = (PDAnnotationFileAttachment) annotation; final PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fileAttach .getFile(); extractFile(outputDir, fileSpec); } } } } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Remove all Attached (embedded) files. * /*from w w w .j av a 2 s . c o m*/ * @param pdfFile * Source PDF file. * @throws IOException */ public static void removeAttachments(final File pdfFile) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Clean the tree to the document catalog. document.getDocumentCatalog().setNames(null); // Create temporary PDF file for result. if (TEMP_PDF.exists()) { TEMP_PDF.delete(); } // Save result to temporary PDF file. document.save(TEMP_PDF); // Replace original PDF file. pdfFile.delete(); Files.move(Paths.get(TEMP_PDF.toURI()), Paths.get(pdfFile.toURI())); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfmetamodifier.IOHelper.java
License:Apache License
/** * Add new Attached (embedded) files./* w w w. j av a2s . c om*/ * * @param pdfFile * Source PDF file. * @param attachmentFiles * Files that will be attached (embedded). * @throws IOException */ /* * See: * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.java?view=markup */ public static void addAttachments(final File pdfFile, final List<File> attachmentFiles) throws IOException { PDDocument document = null; try { // Read PDF file. document = PDDocument.load(pdfFile); if (document.isEncrypted()) { throw new IOException("Document is encrypted."); } // Embedded (attached) files are stored in a named tree. final PDEmbeddedFilesNameTreeNode root = new PDEmbeddedFilesNameTreeNode(); final List<PDEmbeddedFilesNameTreeNode> kids = new ArrayList<PDEmbeddedFilesNameTreeNode>(); root.setKids(kids); // Add the tree to the document catalog. final PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary( document.getDocumentCatalog()); namesDictionary.setEmbeddedFiles(root); document.getDocumentCatalog().setNames(namesDictionary); // For all Embedded (attached) files. for (File file : attachmentFiles) { final String filename = file.getName(); // First create the file specification, which holds the Embedded (attached) file. final PDComplexFileSpecification complexFileSpecification = new PDComplexFileSpecification(); complexFileSpecification.setFile(filename); // Create a dummy file stream, this would probably normally be a FileInputStream. final ByteArrayInputStream fileStream = new ByteArrayInputStream(Files.readAllBytes(file.toPath())); final PDEmbeddedFile embededFile = new PDEmbeddedFile(document, fileStream); complexFileSpecification.setEmbeddedFile(embededFile); // Create a new tree node and add the Embedded (attached) file. final PDEmbeddedFilesNameTreeNode embeddedFilesNameTree = new PDEmbeddedFilesNameTreeNode(); embeddedFilesNameTree.setNames(Collections.singletonMap(filename, complexFileSpecification)); // Add the new node as kid to the root node. kids.add(embeddedFilesNameTree); } // Create temporary PDF file for result. if (TEMP_PDF.exists()) { TEMP_PDF.delete(); } // Save result to temporary PDF file. document.save(TEMP_PDF); // Replace original PDF file. pdfFile.delete(); Files.move(Paths.get(TEMP_PDF.toURI()), Paths.get(pdfFile.toURI())); } finally { if (document != null) { document.close(); } } }