List of usage examples for org.apache.pdfbox.pdmodel PDDocument setAllSecurityToBeRemoved
public void setAllSecurityToBeRemoved(boolean removeAllSecurity)
From source file:ReducePDFSize.java
License:Apache License
public static void main(String[] args) throws IOException { if (2 != args.length) { throw new RuntimeException("arg0 must be input file, org1 must be output file"); }//from ww w. j a v a 2s . c o m String in = args[0]; String out = args[1]; PDDocument doc = null; try { doc = PDDocument.load(new File(in)); doc.setAllSecurityToBeRemoved(true); for (COSObject cosObject : doc.getDocument().getObjects()) { COSBase base = cosObject.getObject(); // if it's a stream: decode it, then re-write it using FLATE_DECODE if (base instanceof COSStream) { COSStream stream = (COSStream) base; byte[] bytes; try { bytes = new PDStream(stream).toByteArray(); } catch (IOException ex) { // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream. // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure? // // IIUC then that will leave the original (non-decoded / non-flated) stream in place? throw new RuntimeException("can't serialize byte[] from: " + cosObject.getObjectNumber() + " " + cosObject.getGenerationNumber() + " obj: " + ex.getMessage(), ex); } stream.removeItem(COSName.FILTER); OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE); streamOut.write(bytes); streamOut.close(); } } doc.getDocumentCatalog(); doc.save(out); } finally { if (doc != null) { doc.close(); } } }
From source file:com.ackpdfbox.app.Decrypt.java
License:Apache License
private void decrypt() throws IOException { PDDocument document = null; try {//from www.ja v a 2s . com InputStream keyStoreStream = null; if (keyStore != null) { keyStoreStream = new FileInputStream(keyStore); } document = PDDocument.load(new File(infile), password, keyStoreStream, alias); if (document.isEncrypted()) { AccessPermission ap = document.getCurrentAccessPermission(); if (ap.isOwnerPermission()) { document.setAllSecurityToBeRemoved(true); document.save(outfile); } else { throw new IOException( "Error: You are only allowed to decrypt a document with the owner password."); } } else { System.err.println("Error: Document is not encrypted."); } } finally { if (document != null) { document.close(); } } }
From source file:com.openkm.extractor.PdfTextExtractor.java
License:Open Source License
/** * {@inheritDoc}//from www.j a v a2 s . c om */ @SuppressWarnings("rawtypes") public String extractText(InputStream stream, String type, String encoding) throws IOException { try { PDFParser parser = new PDFParser(new BufferedInputStream(stream)); try { parser.parse(); PDDocument document = parser.getPDDocument(); if (document.isEncrypted()) { try { document.decrypt(""); document.setAllSecurityToBeRemoved(true); } catch (Exception e) { throw new IOException("Unable to extract text: document encrypted", e); } } CharArrayWriter writer = new CharArrayWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.writeText(document, writer); String st = writer.toString().trim(); log.debug("TextStripped: '{}'", st); if (Config.SYSTEM_PDF_FORCE_OCR || st.length() <= 1) { log.warn("PDF does not contains text layer"); // Extract images from PDF StringBuilder sb = new StringBuilder(); if (!Config.SYSTEM_PDFIMAGES.isEmpty()) { File tmpPdf = FileUtils.createTempFile("pdf"); File tmpDir = new File(EnvironmentDetector.getTempDir()); String baseName = FileUtils.getFileName(tmpPdf.getName()); document.save(tmpPdf); int pgNum = 1; try { for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { HashMap<String, Object> hm = new HashMap<String, Object>(); hm.put("fileIn", tmpPdf.getPath()); hm.put("firstPage", pgNum); hm.put("lastPage", pgNum++); hm.put("imageRoot", tmpDir + File.separator + baseName); String cmd = TemplateUtils.replace("SYSTEM_PDFIMAGES", Config.SYSTEM_PDFIMAGES, hm); ExecutionUtils.runCmd(cmd); for (File tmp : tmpDir.listFiles()) { if (tmp.getName().startsWith(baseName + "-")) { if (page.findRotation() > 0) { ImageUtils.rotate(tmp, tmp, page.findRotation()); } try { String txt = doOcr(tmp); sb.append(txt).append(" "); log.debug("OCR Extracted: {}", txt); } finally { FileUtils.deleteQuietly(tmp); } } } } } finally { FileUtils.deleteQuietly(tmpPdf); } } else { for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { PDResources resources = page.getResources(); Map<String, PDXObject> images = resources.getXObjects(); if (images != null) { for (String key : images.keySet()) { PDXObjectImage image = (PDXObjectImage) images.get(key); String prefix = "img-" + key + "-"; File pdfImg = null; try { pdfImg = File.createTempFile(prefix, ".png"); log.debug("Writing image: {}", pdfImg.getPath()); // Won't work until PDFBox 1.8.9 ImageIO.write(image.getRGBImage(), "png", pdfImg); if (page.findRotation() > 0) { ImageUtils.rotate(pdfImg, pdfImg, page.findRotation()); } // Do OCR String txt = doOcr(pdfImg); sb.append(txt).append(" "); log.debug("OCR Extracted: {}", txt); } finally { FileUtils.deleteQuietly(pdfImg); } } } } } return sb.toString(); } else { return writer.toString(); } } finally { try { PDDocument doc = parser.getPDDocument(); if (doc != null) { doc.close(); } } catch (IOException e) { // ignore } } } catch (Exception e) { // it may happen that PDFParser throws a runtime // exception when parsing certain pdf documents log.warn("Failed to extract PDF text content", e); throw new IOException(e.getMessage(), e); } finally { stream.close(); } }
From source file:com.sustainalytics.crawlerfilter.PDFTitleGeneration.java
License:Apache License
/** * This method extracts creation date/ custom date of a PDF file * @param file is a File object//from ww w . j a v a2 s .c om * @return String that contains the creation date/ custom date of the PDF */ public static String extractDate(File file) { PDDocument document = null; boolean isDamaged = false; //to deal with damaged pdf String creationDateMetaData = ""; try { document = PDDocument.load(file.toString()); /*If the PDF file is not damanged --->*/ if (!isDamaged) { /*...but the file is encrypted --->*/ if (document.isEncrypted()) { logger.info("File " + file.getName() + "is encrypted. Trying to decrypt..."); try { /*...then decryptt it --->*/ document.decrypt(""); document.setAllSecurityToBeRemoved(true); logger.info("File " + file.getName() + "successfully decrypted!"); } catch (CryptographyException e) { logger.info("Error decrypting file " + file.getName()); isDamaged = true; } } /*<--work around to decrypt an encrypted pdf ends here*/ /*Metadata extraction --->*/ PDDocumentInformation info = document.getDocumentInformation(); /*We are only interested in date data--->*/ Calendar calendar = info.getCreationDate(); int creationYear = 0, creationMonth = 0, creationDate = 0; if (calendar != null) { creationYear = calendar.get(Calendar.YEAR); creationMonth = calendar.get(Calendar.MONTH) + 1; creationDate = calendar.get(Calendar.DATE); } /*<---Date data extraction complete*/ /*If creation date is not empty --->*/ if (creationYear != 0) { creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate; } //<--- creation date found and the date part of the title is generated /*No creation date is found --->*/ else { SimpleDateFormat dateFormatter = new SimpleDateFormat("MM/dd/yyyy"); Date customDate = null; /*But we have custom date some times --->*/ try { customDate = dateFormatter.parse(info.getCustomMetadataValue("customdate")); } catch (ParseException e) { logger.info("Error parsing date from custom date"); } calendar = Calendar.getInstance(); calendar.setTime(customDate); if (calendar != null) { creationYear = calendar.get(Calendar.YEAR); creationMonth = calendar.get(Calendar.MONTH) + 1; creationDate = calendar.get(Calendar.DATE); } /*<---Date data extraction complete from customdate*/ if (creationYear != 0) { creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate; } } //<--- work around if no creation date is found } /*<--- Good to know that the PDF was not damaged*/ } catch (IOException e) { /*If the PDF was not read by the system --->*/ logger.info("Error processing file " + file.getName()); /*... then maybe it is damaged*/ isDamaged = true; } finally { try { /*If the file was good, not damaged, then please close it --->*/ if (!isDamaged) { document.close(); logger.info("File " + file.getName() + " is closed successfully!"); } } catch (IOException e) { logger.info("Error closing file " + file.getName()); } } /*<--- PDF closing done!*/ return creationDateMetaData; }
From source file:com.tekstosense.segmenter.Main.java
License:Open Source License
private TextExtractor parsePdf(File f) throws IOException { PDDocument doc = PDDocument.load(f); if (doc.isEncrypted()) { // Some documents are encrypted with the empty password. Try // to decrypt with this password, or the one passed in on the // command line (if any), and fail if we can't. try {//from ww w . j a v a 2 s.c o m doc.setAllSecurityToBeRemoved(false); //doc.decrypt(password); // Defaults to the empty string. } catch (Exception e) { throw new IOException("Can't decrypt document: ", e); } } TextExtractor te = new TextExtractor(); te.writeText(doc, new OutputStreamWriter(new ByteArrayOutputStream())); return te; }
From source file:com.tekstosense.segmenter.StructurePdf.PdfSections.java
License:Open Source License
private TextExtractor parsePdf(File f) throws IOException { PDDocument doc = PDDocument.load(f); if (doc.isEncrypted()) { // Some documents are encrypted with the empty password. Try // to decrypt with this password, or the one passed in on the // command line (if any), and fail if we can't. try {/*from w ww. j a va2s .com*/ doc.setAllSecurityToBeRemoved(false); // doc.decrypt(password); // Defaults to the empty string. } catch (Exception e) { throw new IOException("Can't decrypt document: ", e); } } TextExtractor te = new TextExtractor(); te.writeText(doc, new OutputStreamWriter(new ByteArrayOutputStream())); return te; }
From source file:merge_split.MergeSplit.java
License:Apache License
private void AddButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_AddButtonActionPerformed String fileName;/*from w w w. j av a2s .c o m*/ int returnVal = jFileChooser1.showOpenDialog((Component) evt.getSource()); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = jFileChooser1.getSelectedFile(); fileName = file.toString(); PDDocument doc = null; String code = ""; try { doc = PDDocument.load(file); if (doc.isEncrypted()) { doc.setAllSecurityToBeRemoved(true); } } catch (IOException ex) { } if (doc == null) { JFrame frame = new JFrame("Input Dialog Example 3"); code = JOptionPane.showInputDialog(frame, "Enter password", "PDF is encrypted", JOptionPane.WARNING_MESSAGE); try { doc = PDDocument.load(file, code); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Wrong Password.", "Wrong Password", JOptionPane.WARNING_MESSAGE); } } if (doc != null) { int count = doc.getNumberOfPages(); String currentpages; if (count > 1) { currentpages = "1 - " + count; } else { currentpages = "1"; } boolean isOriginalDocEncrypted = doc.isEncrypted(); String column4; if (isOriginalDocEncrypted) { column4 = code; } else { column4 = "ok"; } dtm.addRow(new Object[] { fileName, count, currentpages, column4 }); try { doc.close(); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Problem accessing file.", "Problem accessing file", JOptionPane.WARNING_MESSAGE); } arr.add(file); } } else { System.out.println("File access cancelled by user."); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void MergeButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_MergeButtonActionPerformed try {/*from w w w. j a v a 2 s . c om*/ PDDocument samplePdf = new PDDocument(); ArrayList<PDDocument> list = new ArrayList<>(); for (int i = 0; i < dtm.getRowCount(); i++) { File file = new File((String) dtm.getValueAt(i, 0)); String code = (String) dtm.getValueAt(i, 3); PDDocument doc1; if (code.equals("ok")) { doc1 = PDDocument.load(file); } else { doc1 = PDDocument.load(file, code); } list.add(doc1); doc1.setAllSecurityToBeRemoved(true); TreeSet tree = findPages((String) dtm.getValueAt(i, 2)); for (int j = 0; j < doc1.getNumberOfPages(); j++) { if (tree.contains(j + 1)) { samplePdf.addPage(doc1.getPage(j)); } } } System.out.println("Number:" + samplePdf.getNumberOfPages()); String destination = jTextField1.getText() + "\\" + jTextField2.getText() + ".pdf"; PDDocumentInformation info = samplePdf.getDocumentInformation(); info.setAuthor(jTextField3.getText()); File output = new File(destination); samplePdf.save(output); samplePdf.close(); for (int i = 0; i < list.size(); i++) { list.get(i).close(); } } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.", "Input warning", JOptionPane.WARNING_MESSAGE); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void RotateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButtonActionPerformed try {//from w ww.j av a 2s.com PDDocument samplePdf = new PDDocument(); File file = new File(RotateFileField.getText()); PDDocument doc1; if (rotatecode.equals("ok")) { doc1 = PDDocument.load(file); } else { doc1 = PDDocument.load(file, rotatecode); } doc1.setAllSecurityToBeRemoved(true); TreeSet tree = findPages(RotatePagesField.getText()); for (int j = 0; j < doc1.getNumberOfPages(); j++) { PDPage page = doc1.getPage(j); if (tree.contains(j + 1)) { if (Rotate90.isSelected()) { page.setRotation(90); samplePdf.addPage(page); } else if (Rotate180.isSelected()) { page.setRotation(180); samplePdf.addPage(page); } else if (Rotate270.isSelected()) { page.setRotation(270); samplePdf.addPage(page); } } else { samplePdf.addPage(page); } } System.out.println("Number:" + samplePdf.getNumberOfPages()); String destination = RotateDestinationField.getText() + "\\" + RotateNameField.getText() + ".pdf"; PDDocumentInformation info = samplePdf.getDocumentInformation(); info.setAuthor(RotateAuthorField.getText()); File output = new File(destination); samplePdf.save(output); samplePdf.close(); } catch (IOException ex) { Logger.getLogger(MergeSplit.class.getName()).log(Level.SEVERE, null, ex); JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.", "Input warning", JOptionPane.WARNING_MESSAGE); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void RotateFileButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateFileButtonActionPerformed String fileName;//from w w w .j ava 2 s.c o m int returnVal = jFileChooser1.showOpenDialog((Component) evt.getSource()); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = jFileChooser1.getSelectedFile(); fileName = file.toString(); PDDocument doc = null; try { doc = PDDocument.load(file); if (doc.isEncrypted()) { doc.setAllSecurityToBeRemoved(true); } } catch (IOException ex) { } rotatecode = ""; if (doc == null) { JFrame frame = new JFrame("Input Dialog Example 3"); rotatecode = JOptionPane.showInputDialog(frame, "Enter password", "PDF is encrypted", JOptionPane.WARNING_MESSAGE); try { doc = PDDocument.load(file, rotatecode); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Wrong Password.", "Wrong Password", JOptionPane.WARNING_MESSAGE); } } if (doc != null) { int count = doc.getNumberOfPages(); String currentpages; if (count > 1) { currentpages = "1 - " + count; } else { currentpages = "1"; } RotatePagesField.setText(currentpages); RotateFileField.setText(fileName); String name = file.getName(); int pos = name.lastIndexOf("."); if (pos > 0) { name = name.substring(0, pos); } name = name + "Rotated"; RotateNameField.setText(name); try { doc.close(); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Problem finishing process.", "Problem finishing process", JOptionPane.WARNING_MESSAGE); } } } else { System.out.println("File access cancelled by user."); } }