Example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted.

Prototype

public boolean isEncrypted()

Source Link

Document

This will tell if this document is encrypted or not.

Usage

From source file:org.seasar.robot.extractor.impl.PdfExtractor.java

License:Apache License

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new RobotSystemException("The inputstream is null.");
    }// w ww  .j  a  v  a 2 s.com
    synchronized (pdfBoxLockObj) {
        PDDocument document = null;
        try {
            document = PDDocument.load(in, null, force);
            if (document.isEncrypted() && params != null) {
                String password = params.get(ExtractData.PDF_PASSWORD);
                if (password == null) {
                    password = getPassword(params.get(ExtractData.URL),
                            params.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
                }
                if (password != null) {
                    final StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password);
                    document.openProtection(sdm);
                    final AccessPermission ap = document.getCurrentAccessPermission();

                    if (!ap.canExtractContent()) {
                        throw new IOException("You do not have permission to extract text.");
                    }
                }
            }

            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final Writer output = new OutputStreamWriter(baos, encoding);
            final PDFTextStripper stripper = new PDFTextStripper(encoding);
            stripper.setForceParsing(force);
            final AtomicBoolean done = new AtomicBoolean(false);
            final PDDocument doc = document;
            final Set<Exception> exceptionSet = new HashSet<>();
            Thread task = new Thread(new Runnable() {
                @Override
                public void run() {
                    try {
                        stripper.writeText(doc, output);
                    } catch (Exception e) {
                        exceptionSet.add(e);
                    } finally {
                        done.set(true);
                    }
                }
            });
            task.setDaemon(true);
            task.start();
            task.join(timeout);
            if (!done.get()) {
                for (int i = 0; i < 100 && !done.get(); i++) {
                    task.interrupt();
                    Thread.sleep(50);
                }
                throw new ExtractException("PDFBox process cannot finish in " + timeout + " sec.");
            } else if (!exceptionSet.isEmpty()) {
                throw exceptionSet.iterator().next();
            }
            output.flush();
            final ExtractData extractData = new ExtractData(baos.toString(encoding));
            extractMetadata(document, extractData);
            return extractData;
        } catch (final Exception e) {
            throw new ExtractException(e);
        } finally {
            if (document != null) {
                try {
                    document.close();
                } catch (final IOException e) {
                    // NOP
                }
            }
        }
    }
}

From source file:org.swiftexplorer.gui.preview.PdfPanel.java

License:Apache License

public synchronized void setPdf(PDDocument pdf) {
    listImagePages.clear();/*w  w w.ja  va  2  s.  c  om*/
    if (pdf == null)
        return;
    try {
        if (pdf.isEncrypted()) {
            logger.info("Failed attempt at previewing an encrypted PDF");
            return;
        }
        PDDocumentCatalog cat = pdf.getDocumentCatalog();
        @SuppressWarnings("unchecked")
        List<PDPage> pages = cat.getAllPages();
        if (pages != null && !pages.isEmpty()) {
            for (PDPage page : pages) {
                listImagePages.add(page.convertToImage());
                if (listImagePages.size() >= maxPageToPreview)
                    break;
            }
        }
    } catch (IOException e) {
        logger.error("Error occurred while opening the pdf document", e);
    } finally {
        if (pdf != null) {
            try {
                pdf.close();
            } catch (IOException ex) {
                logger.error("Error occurred while closing the pdf document", ex);
            }
        }
    }
    repaint();
}

From source file:org.terrier.indexing.PDFDocument.java

License:Mozilla Public License

/** 
 * Returns the reader of text, which is suitable for parsing terms out of,
 * and which is created by converting the file represented by 
 * parameter docStream. This method involves running the stream 
 * through the PDFParser etc provided in the org.pdfbox library.
 * On error, it returns null, and sets EOD to true, so no terms 
 * can be read from this document.//from   w w w  .j  av a 2 s . c o m
 * @param is the input stream that represents the document's file.
 * @return Reader a reader that is fed to an indexer.
 */
protected Reader getReader(InputStream is) {

    if ((Files.length(filename) / 1048576) > 300) {
        logger.info("Skipping document " + filename + " because it's size exceeds 300Mb");
        return new StringReader("");
    }

    PDDocument pdfDocument = null;
    Reader rtr = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.writeText(pdfDocument, writer);

        String contents = writer.getBuffer().toString();
        int spaceCount = StringUtils.countMatches(contents, " ");
        for (char badChar : new char[] { '\u00A0', '\u2029', '#' }) {
            final int count = StringUtils.countMatches(contents, "" + badChar);
            if (count > spaceCount / 2) {
                contents = contents.replace(badChar, ' ');
                spaceCount += count;
            }
        }
        rtr = new StringReader(contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null && USE_PDF_TITLE) {
            setProperty("title", info.getTitle());
        } else {
            setProperty("title", new java.io.File(super.filename).getName());
        }
    } catch (CryptographyException e) {
        throw new RuntimeException("Error decrypting PDF document: " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new RuntimeException("Error: The PDF document is encrypted and will not be indexed.");
    } catch (Exception e) {
        throw new RuntimeException("Error extracting PDF document", e);
    } finally {
        if (pdfDocument != null) {
            try {
                pdfDocument.close();
            } catch (IOException ioe) {
            }
        }
    }
    return rtr;
}

From source file:org.vesalainen.ham.pdf.RfaxTest.java

License:Open Source License

public void test() throws IOException {
    PDDocument document = PDDocument.load(new File("rfax.pdf"));
    if (!document.isEncrypted()) {
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(document);
        try (BufferedWriter bw = Files.newBufferedWriter(Paths.get("src", "main", "resources", "rfax.txt"))) {
            bw.write(text);//from ww  w.j a va  2  s .co  m
        }
    }
    document.close();
}

From source file:org.xstudiosys.pdfxmp.AddMetadataFromDocInfo.java

License:Apache License

/**
 * This will print the documents data.//from  w  w  w . j a va 2s. c  o m
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        usage();
    } else {
        PDDocument document = null;

        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                System.err.println("Error: Cannot add metadata to encrypted document.");
                System.exit(1);
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDDocumentInformation info = document.getDocumentInformation();

            XMPMetadata metadata = new XMPMetadata();

            XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
            pdfSchema.setKeywords(info.getKeywords());
            pdfSchema.setProducer(info.getProducer());

            XMPSchemaBasic basicSchema = metadata.addBasicSchema();
            basicSchema.setModifyDate(info.getModificationDate());
            basicSchema.setCreateDate(info.getCreationDate());
            basicSchema.setCreatorTool(info.getCreator());
            basicSchema.setMetadataDate(new GregorianCalendar());

            XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
            dcSchema.setTitle(info.getTitle());
            dcSchema.addCreator("PDFBox");
            dcSchema.setDescription(info.getSubject());

            PDMetadata metadataStream = new PDMetadata(document);
            metadataStream.importXMPMetadata(metadata);
            catalog.setMetadata(metadataStream);

            document.save(args[1]);
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Try to read the given BibTexEntry from the XMP-stream of the given
 * inputstream containing a PDF-file.//from  www.ja va 2 s  .  com
 * 
 * @param inputStream
 *            The inputstream to read from.
 * 
 * @throws IOException
 *             Throws an IOException if the file cannot be read, so the user
 *             than remove a lock or cancel the operation.
 */
@SuppressWarnings("unchecked")
public static List<BibtexEntry> readXMP(InputStream inputStream) throws IOException {

    List<BibtexEntry> result = new LinkedList<BibtexEntry>();

    PDDocument document = null;

    try {
        document = PDDocument.load(inputStream);
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
        }

        XMPMetadata meta = getXMPMetadata(document);

        // If we did not find any XMP metadata, search for non XMP metadata
        if (meta != null) {

            List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);

            for (XMPSchema schema : schemas) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;

                result.add(bib.getBibtexEntry());
            }

            // If we did not find anything have a look if a Dublin Core exists
            if (result.size() == 0) {
                schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                for (XMPSchema schema : schemas) {
                    XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;

                    BibtexEntry entry = getBibtexEntryFromDublinCore(dc);

                    if (entry != null)
                        result.add(entry);
                }
            }
        }
        if (result.size() == 0) {
            BibtexEntry entry = getBibtexEntryFromDocumentInformation(document.getDocumentInformation());

            if (entry != null)
                result.add(entry);
        }
    } finally {
        if (document != null)
            document.close();
    }

    // return null, if no metadata was found
    if (result.size() == 0)
        return null;
    return result;
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Will read the XMPMetadata from the given pdf file, closing the file
 * afterwards./*from   www .j  av a  2 s  . c om*/
 * 
 * @param inputStream
 *            The inputStream representing a PDF-file to read the
 *            XMPMetadata from.
 * @return The XMPMetadata object found in the file or null if none is
 *         found.
 * @throws IOException
 */
public static XMPMetadata readRawXMP(InputStream inputStream) throws IOException {
    PDDocument document = null;

    try {
        document = PDDocument.load(inputStream);
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
        }

        return getXMPMetadata(document);

    } finally {
        if (document != null)
            document.close();
    }
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntry in the XMP-stream of the given
 * PDF-file./*from ww  w  .  j av a2s.  c  o m*/
 * 
 * Throws an IOException if the file cannot be read or written, so the user
 * can remove a lock or cancel the operation.
 * 
 * The method will overwrite existing BibTeX-XMP-data, but keep other
 * existing metadata.
 * 
 * @param file
 *            The file to write the entries to.
 * @param bibtexEntries
 *            The entries to write to the file. *
 * @param database
 *            maybenull An optional database which the given bibtex entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @param writePDFInfo
 *            Write information also in PDF document properties
 * @throws TransformerException
 *             If the entry was malformed or unsupported.
 * @throws IOException
 *             If the file could not be written to or could not be found.
 */
@SuppressWarnings("unchecked")
public static void writeXMP(File file, Collection<BibtexEntry> bibtexEntries, BibtexDatabase database,
        boolean writePDFInfo) throws IOException, TransformerException {

    if (database != null)
        bibtexEntries = database.resolveForStrings(bibtexEntries, false);

    PDDocument document = null;

    try {
        document = PDDocument.load(file.getAbsoluteFile());
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document.");
        }

        if (writePDFInfo && bibtexEntries.size() == 1) {
            writeDocumentInformation(document, bibtexEntries.iterator().next(), null);
            writeDublinCore(document, bibtexEntries, null);
        }

        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw != null) {
            meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
        } else {
            meta = new XMPMetadata();
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);

        // Remove all current Bibtex-schemas
        List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        for (XMPSchema schema : schemas) {
            XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;
            bib.getElement().getParentNode().removeChild(bib.getElement());
        }

        for (BibtexEntry e : bibtexEntries) {
            XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
            meta.addSchema(bibtex);
            bibtex.setBibtexEntry(e, null);
        }

        // Save to stream and then input that stream to the PDF
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        meta.save(os);
        ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
        PDMetadata metadataStream = new PDMetadata(document, is, false);
        catalog.setMetadata(metadataStream);

        // Save
        try {
            document.save(file.getAbsolutePath());
        } catch (COSVisitorException e) {
            throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage());
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:pdfedittest.PDFEditTest.java

/**
 * @param args the command line arguments
 *///  w ww.j  a v  a 2 s.  com
public static void main(String[] args) {
    // TODO code application logic here
    PDDocument pd;
    BufferedWriter wr;
    try {
        String outputPath = "rayani.txt";
        File input = new File("C:\\Users\\Administrator\\Desktop\\FA Feb 16.pdf"); // The PDF file from where you would like to extract
        File output = new File(outputPath); // The text file where you are going to store the extracted data
        pd = PDDocument.load(input);

        System.out.println(pd.getNumberOfPages());
        System.out.println(pd.isEncrypted());
        //pd.save("CopyOfInvoice.pdf"); // Creates a copy called "CopyOfInvoice.pdf"
        PDFTextStripper stripper = new PDFTextStripper();
        //stripper.setStartPage(3); //Start extracting from page 3
        //stripper.setEndPage(5); //Extract till page 5

        //stripper.set
        wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
        stripper.writeText(pd, wr);
        if (pd != null) {
            pd.close();
        }
        // I use close() to flush the stream.
        wr.close();

        TextParser a = new TextParser(outputPath);
        a.getUserDetail("");
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:pdfpositional.PdfPositional.java

/**
 * @param args the command line arguments
 */// w  ww .  j  a v  a  2 s  .c  o  m
public static void main(String[] args) {
    try {
        // check file param
        if (args.length == 0) {
            throw new ParameterException("No file parameter specified");
        }

        String file = args[args.length - 1];
        Pattern patternFile = Pattern.compile("(?i)^[\\w,\\s-()/]+\\.pdf$");
        Matcher matcherFile = patternFile.matcher(file);

        // check file is valid format
        if (!matcherFile.find()) {
            throw new ParameterException("File parameter invalid: " + file);
        }

        // check if file exists
        File input = new File(file);
        if (!input.exists()) {
            throw new ParameterException("File does not exist: " + file);
        }

        // ensure it isnt a directory
        if (input.isDirectory()) {
            throw new ParameterException("File is a directory: " + file);
        }

        PdfPositional pdfPositional = new PdfPositional(input);
        pdfPositional.setConversion(new Float(1.388888888889));

        pdfPositional.processFileArgument(args[args.length - 1]);
        Pattern patternArgument = Pattern.compile("^-{2}([^=]+)[=]([\\s\\S]+)$");
        Matcher matcher;

        for (int i = 0; i < args.length - 1; i++) {
            matcher = patternArgument.matcher(args[i]);
            while (matcher.find()) {
                switch (matcher.group(1)) {
                case "page":
                    pdfPositional.setPageNumber(Integer.parseInt(matcher.group(2)));
                    break;
                case "output":
                    pdfPositional.setOutputFile(matcher.group(2));
                    break;
                }
            }
        }

        PDDocument document;
        document = PDDocument.load(pdfPositional.getInputFile());

        // check for encrypted document
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (CryptographyException | IOException e) {
                document.close();
                throw new EncryptedDocumentException();
            }
        }

        List allPages = document.getDocumentCatalog().getAllPages();
        if (pdfPositional.hasPageNumber()) {
            if (document.getNumberOfPages() < pdfPositional.getPageNumber()) {
                throw new ParameterException("illegal page number");
            }
            PDPage page = (PDPage) allPages.get(pdfPositional.getPageNumber() - 1);
            PDStream contents = page.getContents();
            if (contents != null) {
                pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                pdfPositional.addPageDataToPdfData();
                pdfPositional.writeJSONToOutputStream();
            }
        } else {
            for (int i = 0; i < allPages.size(); i++) {
                pdfPositional.setPageNumber(i + 1);
                PDPage page = (PDPage) allPages.get(i);
                PDStream contents = page.getContents();

                if (contents != null) {
                    pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                    pdfPositional.addPageDataToPdfData();
                    pdfPositional.writeJSONToOutputStream();
                }

                page.clear();
            }
        }

        pdfPositional.destroyOutputStream();
        document.close();

        System.exit(0);
    } catch (ParameterException ex) {
        System.out.println("Parameter Error: " + ex.getMessage());
        System.exit(1);
    } catch (EncryptedDocumentException ex) {
        System.out.println("Encrypted Document Error");
        System.exit(1);
    } catch (IOException | NumberFormatException ex) {
        System.out.println("General Error");
        System.exit(1);
    }

}