Example usage for org.apache.pdfbox.pdmodel PDDocument setAllSecurityToBeRemoved

List of usage examples for org.apache.pdfbox.pdmodel PDDocument setAllSecurityToBeRemoved

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument setAllSecurityToBeRemoved.

Prototype

public void setAllSecurityToBeRemoved(boolean removeAllSecurity) 

Source Link

Document

Activates/Deactivates the removal of all security when writing the pdf.

Usage

From source file:ReducePDFSize.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (2 != args.length) {
        throw new RuntimeException("arg0 must be input file, org1 must be output file");
    }//from  ww  w.  j a  v a 2s  .  c o m
    String in = args[0];
    String out = args[1];
    PDDocument doc = null;

    try {
        doc = PDDocument.load(new File(in));
        doc.setAllSecurityToBeRemoved(true);
        for (COSObject cosObject : doc.getDocument().getObjects()) {
            COSBase base = cosObject.getObject();
            // if it's a stream: decode it, then re-write it using FLATE_DECODE
            if (base instanceof COSStream) {
                COSStream stream = (COSStream) base;
                byte[] bytes;
                try {
                    bytes = new PDStream(stream).toByteArray();
                } catch (IOException ex) {
                    // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream.
                    // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure?
                    //
                    // IIUC then that will leave the original (non-decoded / non-flated) stream in place?
                    throw new RuntimeException("can't serialize byte[] from: " + cosObject.getObjectNumber()
                            + " " + cosObject.getGenerationNumber() + " obj: " + ex.getMessage(), ex);
                }
                stream.removeItem(COSName.FILTER);
                OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE);
                streamOut.write(bytes);
                streamOut.close();
            }
        }
        doc.getDocumentCatalog();
        doc.save(out);
    } finally {
        if (doc != null) {
            doc.close();
        }
    }
}

From source file:com.ackpdfbox.app.Decrypt.java

License:Apache License

private void decrypt() throws IOException {
    PDDocument document = null;
    try {//from  www.ja v a  2s . com
        InputStream keyStoreStream = null;
        if (keyStore != null) {
            keyStoreStream = new FileInputStream(keyStore);
        }

        document = PDDocument.load(new File(infile), password, keyStoreStream, alias);

        if (document.isEncrypted()) {
            AccessPermission ap = document.getCurrentAccessPermission();
            if (ap.isOwnerPermission()) {
                document.setAllSecurityToBeRemoved(true);
                document.save(outfile);
            } else {
                throw new IOException(
                        "Error: You are only allowed to decrypt a document with the owner password.");
            }
        } else {
            System.err.println("Error: Document is not encrypted.");
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:com.openkm.extractor.PdfTextExtractor.java

License:Open Source License

/**
 * {@inheritDoc}//from  www.j a v a2  s .  c om
 */
@SuppressWarnings("rawtypes")
public String extractText(InputStream stream, String type, String encoding) throws IOException {
    try {
        PDFParser parser = new PDFParser(new BufferedInputStream(stream));

        try {
            parser.parse();
            PDDocument document = parser.getPDDocument();

            if (document.isEncrypted()) {
                try {
                    document.decrypt("");
                    document.setAllSecurityToBeRemoved(true);
                } catch (Exception e) {
                    throw new IOException("Unable to extract text: document encrypted", e);
                }
            }

            CharArrayWriter writer = new CharArrayWriter();
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.writeText(document, writer);
            String st = writer.toString().trim();
            log.debug("TextStripped: '{}'", st);

            if (Config.SYSTEM_PDF_FORCE_OCR || st.length() <= 1) {
                log.warn("PDF does not contains text layer");

                // Extract images from PDF
                StringBuilder sb = new StringBuilder();

                if (!Config.SYSTEM_PDFIMAGES.isEmpty()) {
                    File tmpPdf = FileUtils.createTempFile("pdf");
                    File tmpDir = new File(EnvironmentDetector.getTempDir());
                    String baseName = FileUtils.getFileName(tmpPdf.getName());
                    document.save(tmpPdf);
                    int pgNum = 1;

                    try {
                        for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) {
                            HashMap<String, Object> hm = new HashMap<String, Object>();
                            hm.put("fileIn", tmpPdf.getPath());
                            hm.put("firstPage", pgNum);
                            hm.put("lastPage", pgNum++);
                            hm.put("imageRoot", tmpDir + File.separator + baseName);
                            String cmd = TemplateUtils.replace("SYSTEM_PDFIMAGES", Config.SYSTEM_PDFIMAGES, hm);
                            ExecutionUtils.runCmd(cmd);

                            for (File tmp : tmpDir.listFiles()) {
                                if (tmp.getName().startsWith(baseName + "-")) {
                                    if (page.findRotation() > 0) {
                                        ImageUtils.rotate(tmp, tmp, page.findRotation());
                                    }

                                    try {
                                        String txt = doOcr(tmp);
                                        sb.append(txt).append(" ");
                                        log.debug("OCR Extracted: {}", txt);
                                    } finally {
                                        FileUtils.deleteQuietly(tmp);
                                    }
                                }
                            }
                        }
                    } finally {
                        FileUtils.deleteQuietly(tmpPdf);
                    }
                } else {
                    for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) {
                        PDResources resources = page.getResources();
                        Map<String, PDXObject> images = resources.getXObjects();

                        if (images != null) {
                            for (String key : images.keySet()) {
                                PDXObjectImage image = (PDXObjectImage) images.get(key);
                                String prefix = "img-" + key + "-";
                                File pdfImg = null;

                                try {
                                    pdfImg = File.createTempFile(prefix, ".png");
                                    log.debug("Writing image: {}", pdfImg.getPath());

                                    // Won't work until PDFBox 1.8.9
                                    ImageIO.write(image.getRGBImage(), "png", pdfImg);

                                    if (page.findRotation() > 0) {
                                        ImageUtils.rotate(pdfImg, pdfImg, page.findRotation());
                                    }

                                    // Do OCR
                                    String txt = doOcr(pdfImg);
                                    sb.append(txt).append(" ");
                                    log.debug("OCR Extracted: {}", txt);
                                } finally {
                                    FileUtils.deleteQuietly(pdfImg);
                                }
                            }
                        }
                    }
                }

                return sb.toString();
            } else {
                return writer.toString();
            }
        } finally {
            try {
                PDDocument doc = parser.getPDDocument();
                if (doc != null) {
                    doc.close();
                }
            } catch (IOException e) {
                // ignore
            }
        }
    } catch (Exception e) {
        // it may happen that PDFParser throws a runtime
        // exception when parsing certain pdf documents
        log.warn("Failed to extract PDF text content", e);
        throw new IOException(e.getMessage(), e);
    } finally {
        stream.close();
    }
}

From source file:com.sustainalytics.crawlerfilter.PDFTitleGeneration.java

License:Apache License

/**
 * This method extracts creation date/ custom date of a PDF file
 * @param file is a File object//from   ww  w .  j  a v  a2  s .c  om
 * @return String that contains the creation date/ custom date of the PDF
 */
public static String extractDate(File file) {
    PDDocument document = null;
    boolean isDamaged = false; //to deal with damaged pdf
    String creationDateMetaData = "";
    try {
        document = PDDocument.load(file.toString());
        /*If the PDF file is not damanged --->*/
        if (!isDamaged) {
            /*...but the file is encrypted --->*/
            if (document.isEncrypted()) {
                logger.info("File " + file.getName() + "is encrypted. Trying to decrypt...");
                try {
                    /*...then decryptt it --->*/
                    document.decrypt("");
                    document.setAllSecurityToBeRemoved(true);
                    logger.info("File " + file.getName() + "successfully decrypted!");
                } catch (CryptographyException e) {
                    logger.info("Error decrypting file " + file.getName());
                    isDamaged = true;
                }

            } /*<--work around to decrypt an encrypted pdf ends here*/

            /*Metadata extraction --->*/
            PDDocumentInformation info = document.getDocumentInformation();

            /*We are only interested in date data--->*/
            Calendar calendar = info.getCreationDate();
            int creationYear = 0, creationMonth = 0, creationDate = 0;
            if (calendar != null) {
                creationYear = calendar.get(Calendar.YEAR);
                creationMonth = calendar.get(Calendar.MONTH) + 1;
                creationDate = calendar.get(Calendar.DATE);

            } /*<---Date data extraction complete*/

            /*If creation date is not empty --->*/
            if (creationYear != 0) {
                creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
            } //<--- creation date found and the date part of the title is generated
            /*No creation date is found --->*/
            else {
                SimpleDateFormat dateFormatter = new SimpleDateFormat("MM/dd/yyyy");
                Date customDate = null;
                /*But we have custom date some times --->*/
                try {
                    customDate = dateFormatter.parse(info.getCustomMetadataValue("customdate"));
                } catch (ParseException e) {
                    logger.info("Error parsing date from custom date");
                }
                calendar = Calendar.getInstance();
                calendar.setTime(customDate);
                if (calendar != null) {
                    creationYear = calendar.get(Calendar.YEAR);
                    creationMonth = calendar.get(Calendar.MONTH) + 1;
                    creationDate = calendar.get(Calendar.DATE);

                } /*<---Date data extraction complete from customdate*/
                if (creationYear != 0) {
                    creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
                }
            } //<--- work around if no creation date is found

        } /*<--- Good to know that the PDF was not damaged*/
    } catch (IOException e) { /*If the PDF was not read by the system --->*/
        logger.info("Error processing file " + file.getName());
        /*... then maybe it is damaged*/
        isDamaged = true;
    } finally {
        try {
            /*If the file was good, not damaged, then please close it --->*/
            if (!isDamaged) {
                document.close();
                logger.info("File " + file.getName() + " is closed successfully!");
            }
        } catch (IOException e) {
            logger.info("Error closing file " + file.getName());
        }
    } /*<--- PDF closing done!*/
    return creationDateMetaData;
}

From source file:com.tekstosense.segmenter.Main.java

License:Open Source License

private TextExtractor parsePdf(File f) throws IOException {
    PDDocument doc = PDDocument.load(f);

    if (doc.isEncrypted()) {
        // Some documents are encrypted with the empty password. Try
        // to decrypt with this password, or the one passed in on the
        // command line (if any), and fail if we can't.
        try {//from   ww  w  .  j  a v  a 2 s.c o  m
            doc.setAllSecurityToBeRemoved(false);
            //doc.decrypt(password); // Defaults to the empty string.
        } catch (Exception e) {
            throw new IOException("Can't decrypt document: ", e);
        }
    }
    TextExtractor te = new TextExtractor();
    te.writeText(doc, new OutputStreamWriter(new ByteArrayOutputStream()));

    return te;
}

From source file:com.tekstosense.segmenter.StructurePdf.PdfSections.java

License:Open Source License

private TextExtractor parsePdf(File f) throws IOException {
    PDDocument doc = PDDocument.load(f);

    if (doc.isEncrypted()) {
        // Some documents are encrypted with the empty password. Try
        // to decrypt with this password, or the one passed in on the
        // command line (if any), and fail if we can't.
        try {/*from  w ww.  j a  va2s .com*/
            doc.setAllSecurityToBeRemoved(false);
            // doc.decrypt(password); // Defaults to the empty string.
        } catch (Exception e) {
            throw new IOException("Can't decrypt document: ", e);
        }
    }
    TextExtractor te = new TextExtractor();
    te.writeText(doc, new OutputStreamWriter(new ByteArrayOutputStream()));

    return te;
}

From source file:merge_split.MergeSplit.java

License:Apache License

private void AddButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_AddButtonActionPerformed

    String fileName;/*from   w  w w.  j  av a2s .c  o m*/
    int returnVal = jFileChooser1.showOpenDialog((Component) evt.getSource());
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        File file = jFileChooser1.getSelectedFile();
        fileName = file.toString();
        PDDocument doc = null;
        String code = "";
        try {
            doc = PDDocument.load(file);
            if (doc.isEncrypted()) {

                doc.setAllSecurityToBeRemoved(true);

            }
        } catch (IOException ex) {

        }
        if (doc == null) {
            JFrame frame = new JFrame("Input Dialog Example 3");

            code = JOptionPane.showInputDialog(frame, "Enter password", "PDF is encrypted",
                    JOptionPane.WARNING_MESSAGE);
            try {
                doc = PDDocument.load(file, code);
            } catch (IOException ex) {
                JOptionPane.showMessageDialog(null, "Wrong Password.", "Wrong Password",
                        JOptionPane.WARNING_MESSAGE);

            }

        }
        if (doc != null) {
            int count = doc.getNumberOfPages();

            String currentpages;
            if (count > 1) {
                currentpages = "1 - " + count;
            } else {
                currentpages = "1";
            }
            boolean isOriginalDocEncrypted = doc.isEncrypted();

            String column4;
            if (isOriginalDocEncrypted) {
                column4 = code;
            } else {
                column4 = "ok";
            }
            dtm.addRow(new Object[] { fileName, count, currentpages, column4 });
            try {
                doc.close();
            } catch (IOException ex) {
                JOptionPane.showMessageDialog(null, "Problem accessing file.", "Problem accessing file",
                        JOptionPane.WARNING_MESSAGE);
            }

            arr.add(file);
        }
    } else {
        System.out.println("File access cancelled by user.");
    }

}

From source file:merge_split.MergeSplit.java

License:Apache License

private void MergeButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_MergeButtonActionPerformed
    try {/*from   w  w w. j a v a  2  s  . c om*/
        PDDocument samplePdf = new PDDocument();
        ArrayList<PDDocument> list = new ArrayList<>();
        for (int i = 0; i < dtm.getRowCount(); i++) {
            File file = new File((String) dtm.getValueAt(i, 0));
            String code = (String) dtm.getValueAt(i, 3);
            PDDocument doc1;
            if (code.equals("ok")) {
                doc1 = PDDocument.load(file);
            } else {
                doc1 = PDDocument.load(file, code);

            }
            list.add(doc1);
            doc1.setAllSecurityToBeRemoved(true);
            TreeSet tree = findPages((String) dtm.getValueAt(i, 2));
            for (int j = 0; j < doc1.getNumberOfPages(); j++) {
                if (tree.contains(j + 1)) {
                    samplePdf.addPage(doc1.getPage(j));
                }

            }

        }
        System.out.println("Number:" + samplePdf.getNumberOfPages());

        String destination = jTextField1.getText() + "\\" + jTextField2.getText() + ".pdf";
        PDDocumentInformation info = samplePdf.getDocumentInformation();
        info.setAuthor(jTextField3.getText());
        File output = new File(destination);

        samplePdf.save(output);

        samplePdf.close();
        for (int i = 0; i < list.size(); i++) {
            list.get(i).close();
        }
    } catch (IOException ex) {

        JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.",
                "Input warning", JOptionPane.WARNING_MESSAGE);
    }

}

From source file:merge_split.MergeSplit.java

License:Apache License

private void RotateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButtonActionPerformed
    try {//from  w ww.j  av  a  2s.com

        PDDocument samplePdf = new PDDocument();
        File file = new File(RotateFileField.getText());
        PDDocument doc1;
        if (rotatecode.equals("ok")) {
            doc1 = PDDocument.load(file);
        } else {
            doc1 = PDDocument.load(file, rotatecode);

        }
        doc1.setAllSecurityToBeRemoved(true);
        TreeSet tree = findPages(RotatePagesField.getText());
        for (int j = 0; j < doc1.getNumberOfPages(); j++) {
            PDPage page = doc1.getPage(j);

            if (tree.contains(j + 1)) {

                if (Rotate90.isSelected()) {
                    page.setRotation(90);
                    samplePdf.addPage(page);
                } else if (Rotate180.isSelected()) {
                    page.setRotation(180);
                    samplePdf.addPage(page);
                } else if (Rotate270.isSelected()) {
                    page.setRotation(270);
                    samplePdf.addPage(page);
                }
            } else {
                samplePdf.addPage(page);

            }

        }

        System.out.println("Number:" + samplePdf.getNumberOfPages());

        String destination = RotateDestinationField.getText() + "\\" + RotateNameField.getText() + ".pdf";
        PDDocumentInformation info = samplePdf.getDocumentInformation();
        info.setAuthor(RotateAuthorField.getText());
        File output = new File(destination);

        samplePdf.save(output);

        samplePdf.close();
    } catch (IOException ex) {
        Logger.getLogger(MergeSplit.class.getName()).log(Level.SEVERE, null, ex);

        JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.",
                "Input warning", JOptionPane.WARNING_MESSAGE);
    }
}

From source file:merge_split.MergeSplit.java

License:Apache License

private void RotateFileButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateFileButtonActionPerformed
    String fileName;//from w w  w .j  ava  2 s.c o m
    int returnVal = jFileChooser1.showOpenDialog((Component) evt.getSource());
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        File file = jFileChooser1.getSelectedFile();
        fileName = file.toString();
        PDDocument doc = null;
        try {
            doc = PDDocument.load(file);
            if (doc.isEncrypted()) {

                doc.setAllSecurityToBeRemoved(true);

            }
        } catch (IOException ex) {

        }
        rotatecode = "";
        if (doc == null) {
            JFrame frame = new JFrame("Input Dialog Example 3");

            rotatecode = JOptionPane.showInputDialog(frame, "Enter password", "PDF is encrypted",
                    JOptionPane.WARNING_MESSAGE);
            try {
                doc = PDDocument.load(file, rotatecode);
            } catch (IOException ex) {
                JOptionPane.showMessageDialog(null, "Wrong Password.", "Wrong Password",
                        JOptionPane.WARNING_MESSAGE);

            }

        }

        if (doc != null) {
            int count = doc.getNumberOfPages();

            String currentpages;
            if (count > 1) {
                currentpages = "1 - " + count;
            } else {
                currentpages = "1";
            }
            RotatePagesField.setText(currentpages);
            RotateFileField.setText(fileName);
            String name = file.getName();
            int pos = name.lastIndexOf(".");
            if (pos > 0) {
                name = name.substring(0, pos);
            }
            name = name + "Rotated";
            RotateNameField.setText(name);
            try {
                doc.close();
            } catch (IOException ex) {
                JOptionPane.showMessageDialog(null, "Problem finishing process.", "Problem finishing process",
                        JOptionPane.WARNING_MESSAGE);
            }

        }
    } else {
        System.out.println("File access cancelled by user.");
    }
}