Example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted

List of usage examples for org.apache.pdfbox.pdmodel PDDocument isEncrypted

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument isEncrypted.

Prototype

public boolean isEncrypted() 

Source Link

Document

This will tell if this document is encrypted or not.

Usage

From source file:pdfsplicer.SplicerModel.java

License:Open Source License

/**
 * Create the new PDF, and save it.//w ww. j av a  2  s. c  om
 * 
 * @param saveFile the file to save it as
 * @throws IOException if it cannot save the file
 */
public void makeFinalizedPDF(File saveFile) throws IOException {

    PDDocument doc = null;
    PDDocument newdoc = new PDDocument();

    for (int i = 0; i < pageEntryPDFList.size(); ++i) {
        doc = pdfList.get(pageEntryPDFList.get(i));

        if (doc.isEncrypted()) {
            System.out.println("Error: Encrypted PDF");
            System.exit(1);
        }

        List<Integer> pRange = pageRangeList.get(i);
        PDFCloneUtility pdfCloner = new PDFCloneUtility(newdoc);
        for (int pNum : pRange) {
            PDPage page = doc.getPage(pNum - 1);
            COSDictionary clonedDict = (COSDictionary) pdfCloner.cloneForNewDocument(page);
            newdoc.addPage(new PDPage(clonedDict));
        }
    }

    newdoc.save(saveFile);
    if (newdoc != null) {
        newdoc.close();
    }
}

From source file:pl.umk.mat.zawodyweb.pdf.PdfToImage.java

License:Open Source License

public static BufferedImage process(InputStream pdfFile) {
    PDDocument pdf = null;
    BufferedImage output = null;/*from  w ww .  j  av  a  2  s.c  o  m*/
    try {
        pdf = PDDocument.load(pdfFile, true);
        if (pdf.isEncrypted()) {
            pdf.decrypt("");
        }

        List<PDPage> pdfPages = pdf.getDocumentCatalog().getAllPages();
        if (pdfPages.isEmpty() == false) {
            Iterator<PDPage> it = pdfPages.iterator();
            PDPage page = it.next();

            BufferedImage bi = page.convertToImage(BufferedImage.TYPE_USHORT_565_RGB, 72 * 2);
            if (pdfPages.size() == 1) {
                output = bi;
            } else {
                int width = bi.getWidth();
                int height = bi.getHeight();

                output = new BufferedImage(width, height * pdfPages.size(), BufferedImage.TYPE_USHORT_565_RGB);

                Graphics2D g = output.createGraphics();
                g.drawImage(bi, 0, 0, null);
                g.setColor(Color.red);

                int pageNo = 0;
                while (it.hasNext()) {
                    ++pageNo;

                    page = it.next();
                    bi = page.convertToImage(BufferedImage.TYPE_USHORT_565_RGB, 72 * 2);

                    g.drawImage(bi, 0, pageNo * height, null);
                    g.drawLine(0, pageNo * height, width, pageNo * height);
                }
                g.dispose();
            }
        }
    } catch (Exception ex) {
        throw new RuntimeException("Exception converting pdf to image: ", ex);
    } finally {
        if (pdf != null) {
            try {
                pdf.close();
            } catch (IOException ex) {
                throw new RuntimeException("Exception when closing pdf: ", ex);
            }
        }
    }
    return output;
}

From source file:Project.data.preparation.ImageExtraction.java

public void extractImages(String sourceDir, String destinationDir)
        throws IOException, CryptographyException, COSVisitorException {
    PDDocument document = null;
    double[] size;
    if (oldFile.exists()) {
        document = PDDocument.load(sourceDir);
        if (document.isEncrypted()) {
            document.decrypt("");
        }/*from w  w  w  .  j a v a 2  s  .  c o  m*/
        PrintImageLocation printer; // Get image location
        List<PDPage> list = document.getDocumentCatalog().getAllPages();

        String fileName_img = oldFile.getName().replace(".pdf", "_cover");
        int pageNum = 0;
        int totalImages = 1;
        System.out.println("\n" + filename);

        for (PDPage page : list) {

            original_imgName = new ArrayList<String>();
            location_xy = new ArrayList<double[]>();
            size_xy_ordered = new ArrayList<double[]>();
            size_xy_tmp = new ArrayList<double[]>();
            PDResources pdResources = page.getResources();
            Map pageImages = pdResources.getXObjects();
            pageNum++;
            if (pageImages != null && pageImages.size() > 0) {

                Iterator imageIter = pageImages.keySet().iterator();
                while (imageIter.hasNext()) {

                    String key = (String) imageIter.next();
                    PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                    String imgName = fileName_img + "_" + totalImages;
                    System.out.println("Page Number : " + pageNum + "\t" + imgName);
                    pdxObjectImage.write2file(destinationDir + imgName);

                    original_imgName.add(imgName + "." + pdxObjectImage.getSuffix());
                    size = new double[] { pdxObjectImage.getWidth(), pdxObjectImage.getHeight() };
                    size_xy_ordered.add(size);
                    totalImages++;
                }
                //Start for detect figure name for image renaming
                printer = new PrintImageLocation(page);
                location_xy = printer.getLocation_xy();
                size_xy_tmp = printer.getSize_xy();
                RearrangeImageOrder(location_xy, size_xy_tmp, size_xy_ordered);
                //PrinttoString();
                DetectFigureName detectFig = new DetectFigureName(original_imgName, filename, pageNum, page,
                        location_ordered, size_xy_ordered);
            }
        }
    } else {
        System.err.println("File not exists");
    }

    if (document != null) {
        document.close();
    }

}

From source file:summarizer.pdfReader.java

List<String> parsePdf(String filePath, int startPage, int endPage, boolean newLine) {
    PDDocument pd;
    BufferedWriter wr;//from  w  ww .  j  a  va 2s .c  om
    List<String> outputStrings = new ArrayList<String>();
    try {
        File input = new File(filePath); // The PDF file from where you would like to extract
        File output = new File("SampleText.txt"); // The text file where you are going to store the extracted data
        pd = PDDocument.load(input);
        System.out.println(pd.getNumberOfPages());
        if (pd.isEncrypted()) {
            System.out.println("Error PDF is encrypted, cannot Parse");
        }
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setStartPage(startPage); //Start extracting from page 14
        stripper.setEndPage(endPage); //Extract till page 16
        wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
        stripper.writeText(pd, wr);
        if (pd != null) {
            pd.close();
        }
        wr.close();
        BufferedReader in = new BufferedReader(new FileReader("SampleText.txt"));
        String s;
        StringBuilder sb = new StringBuilder();
        while ((s = in.readLine()) != null) {
            sb.append(" ");
            sb.append(s);
        }
        s = sb.toString();
        String[] tokenizedStrings;
        if (newLine) {
            tokenizedStrings = s.split("\\r?\\n");
        } else {
            tokenizedStrings = s.split("\\.");
        }
        for (String x : tokenizedStrings) {
            if (x.compareTo("") != 0)
                outputStrings.add(x);
        }
        //System.out.println(s);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return outputStrings;
}

From source file:uk.bl.dpt.qa.flint.wrappers.PDFBoxWrapper.java

License:Apache License

/**
 * Check if a PDF file has DRM or not//from  ww  w  . j  a va  2  s . co m
 * @param pFile file to check
 * @return whether the file is had DRM or not
 */
public boolean hasDRM(File pFile) {
    boolean ret = false;

    File tmp = null;
    try {
        System.setProperty("org.apache.pdfbox.baseParser.pushBackSize", "1024768");
        // NOTE: we use loadNonSeq here as it is the latest parser
        // load() and parser.parse() have hung on test files
        tmp = File.createTempFile("flint-", ".tmp");
        tmp.deleteOnExit();
        RandomAccess scratchFile = new RandomAccessFile(tmp, "rw");
        PDDocument doc = PDDocument.loadNonSeq(new FileInputStream(pFile), scratchFile);
        ret = doc.isEncrypted();
        doc.close();

    } catch (IOException e) {

        // This may occur when a suitable security handler cannot be found
        if (e.getMessage().contains("BadSecurityHandlerException")) {
            // if this happens then there must be some sort of DRM here
            ret = true;
        }

    } catch (Exception e) {

        e.printStackTrace();

        // See comments in https://issues.apache.org/jira/browse/PDFBOX-1757
        // PDFBox state that these files have errors and their parser is correct
        // The only way to find out that the parser doesn't like it is to catch
        // a general Exception.

        // If we reach this point then we have no idea of whether the file contains
        // DRM or not.  Return false and hope it is detected elsewhere.

        ret = false;
    } finally {
        if (tmp != null)
            tmp.delete();
    }
    return ret;
}

From source file:uk.bl.wa.tika.parser.pdf.pdfbox.PDFParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDDocument pdfDocument = null;
    TemporaryResources tmp = new TemporaryResources();

    try {/*  www  .  j  a v a 2  s .c  o  m*/
        // PDFBox can process entirely in memory, or can use a temp file
        //  for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not already
        TikaInputStream tstream = TikaInputStream.cast(stream);
        pdfDocument = PDDocument.load(new CloseShieldInputStream(stream),
                MemoryUsageSetting.setupMixed(100 * 1024 * 1024));

        if (pdfDocument.isEncrypted()) {
            String password = null;

            // Did they supply a new style Password Provider?
            PasswordProvider passwordProvider = context.get(PasswordProvider.class);
            if (passwordProvider != null) {
                password = passwordProvider.getPassword(metadata);
            }

            // Fall back on the old style metadata if set
            if (password == null && metadata.get(PASSWORD) != null) {
                password = metadata.get(PASSWORD);
            }

            // If no password is given, use an empty string as the default
            if (password == null) {
                password = "";
            }

        }
        metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
        extractMetadata(pdfDocument, metadata);
        PDF2XHTML.process(pdfDocument, handler, metadata, extractAnnotationText, enableAutoSpace,
                suppressDuplicateOverlappingText, sortByPosition);
    } catch (Exception e) {
        log.error("Exception while parsing PDF: " + e);
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
        tmp.dispose();
    }
}

From source file:uk.bl.wa.tika.parser.pdf.pdfbox.PDFParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException {
    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    addMetadata(metadata, Metadata.TITLE, info.getTitle());
    addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
    addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
    addMetadata(metadata, "pdf:creator", info.getCreator());
    addMetadata(metadata, "pdf:producer", info.getProducer());
    addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    addMetadata(metadata, "created", info.getCreationDate());
    addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
    Calendar modified = info.getModificationDate();
    addMetadata(metadata, Metadata.LAST_MODIFIED, modified);

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList(new String[] { "Author", "Creator", "CreationDate", "ModDate",
            "Keywords", "Producer", "Subject", "Title", "Trapped" });
    if (info.getCOSObject() != null && info.getCOSObject().keySet() != null) {
        for (COSName key : info.getCOSObject().keySet()) {
            String name = key.getName();
            if (!handledMetadata.contains(name)) {
                addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
            }/* www  .j ava  2  s  .c  o m*/
        }
    }
    // ANJ Extensions:
    //
    //
    // Add other data of interest:
    metadata.set("pdf:version", "" + document.getDocument().getVersion());
    metadata.set("pdf:numPages", "" + document.getNumberOfPages());
    //metadata.set("pdf:cryptoMode", ""+getCryptoModeAsString(reader));
    //metadata.set("pdf:openedWithFullPermissions", ""+reader.isOpenedWithFullPermissions());
    metadata.set("pdf:encrypted", "" + document.isEncrypted());
    //metadata.set("pdf:metadataEncrypted", ""+document.isMetadataEncrypted());
    //metadata.set("pdf:128key", ""+reader.is128Key());
    //metadata.set("pdf:tampered", ""+reader.isTampered());
    try {
        if (document.getDocumentCatalog().getMetadata() != null) {
            XMPMetadata xmp = XMPMetadata.load(document.getDocumentCatalog().getMetadata().exportXMPMetadata());
            // There is a special class for grabbing data in the PDF schema - not sure it will add much here:
            // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here.
            XMPSchemaPDF pdfxmp = xmp.getPDFSchema();
            // Added a PDF/A schema class:
            xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class);
            XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class);
            if (pdfaxmp != null) {
                metadata.set("pdfaid:part", pdfaxmp.getPart());
                metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase();
                //metadata.set("pdfa:version", version );                    
                metadata.set("pdf:version", version);
            }
            // TODO WARN if this XMP version is inconsistent with document header version?
        }
    } catch (IOException e) {
        log.error("XMP Parsing failed: " + e);
        metadata.set("pdf:metadata-xmp-parse-failed", "" + e);
    }

    // Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSObject();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    metadata.set("pdf:version", baseVersion + " Adobe Extension Level " + el);
                }
                // TODO WARN if this embedded version is inconsistent with document header version?
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
    // End Of ANJ Extensions.
}

From source file:zhaw.PDFIndexer.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 * /*from  w  ww.j av a 2  s .c o m*/
 * @param document
 *            The document to add the contents to.
 * @param is
 *            The stream to get the contents from.
 * @param documentLocation
 *            The location of the document, used just for debug messages.
 * @throws IOException
 *             If there is an error parsing the document.
 */
private void addContent(Document document, InputStream is, String documentLocation) throws IOException {
    PDDocument pdfDocument = null;
    PDFTextStripper stripper;
    try {
        pdfDocument = PDDocument.load(is);
        if (pdfDocument.isEncrypted()) {
            // Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        // create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        stripper = new PDFTextStripper();
        try {
            stripper.writeText(pdfDocument, writer);

        } catch (Exception e) {
            System.out.println("Error in stripper.writeText()");
        }
        String contents = writer.getBuffer().toString();

        StringReader reader = new StringReader(contents);
        addTextField(document, Indexer.contents, reader);
        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addTextField(document, Indexer.Author, info.getAuthor());
            try {
                addTextField(document, Indexer.created, info.getCreationDate());
            } catch (IOException io) {
                // ignore, bad date but continue with indexing
            }

            addTextField(document, Indexer.keywords, info.getKeywords());
            try {
                addTextField(document, Indexer.modified, info.getModificationDate());
            } catch (IOException io) {
                // ignore, bad date but continue with indexing
            }
            addTextField(document, "Subject", info.getSubject());
            addTextField(document, Indexer.Title, info.getTitle());
        }
        int summarySize = Math.min(contents.length(), 500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and
        // returned
        // with hit documents for display.
        addUnindexedField(document, Indexer.summary, summary);
    } catch (CryptographyException e) {
        throw new IOException("Error decrypting document(" + documentLocation + "): " + e);
    } catch (InvalidPasswordException e) {
        // they didn't suppply a password and the default of "" was wrong.
        throw new IOException(
                "Error: The document(" + documentLocation + ") is encrypted and will not be indexed.");
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}