Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input, String password) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:ShowSignature.java

License:Apache License

private void showSignature(String[] args) throws IOException, CertificateException {
    if (args.length != 2) {
        usage();/*from   w  w w  .  j a v a 2 s  . co  m*/
    } else {
        String password = args[0];
        String infile = args[1];
        PDDocument document = null;
        try {
            document = PDDocument.load(new File(infile), password);
            if (!document.isEncrypted()) {
                System.err.println("Warning: Document is not encrypted.");
            }

            COSDictionary trailer = document.getDocument().getTrailer();
            COSDictionary root = (COSDictionary) trailer.getDictionaryObject(COSName.ROOT);
            COSDictionary acroForm = (COSDictionary) root.getDictionaryObject(COSName.ACRO_FORM);
            COSArray fields = (COSArray) acroForm.getDictionaryObject(COSName.FIELDS);
            for (int i = 0; i < fields.size(); i++) {
                COSDictionary field = (COSDictionary) fields.getObject(i);
                COSName type = field.getCOSName(COSName.FT);
                if (COSName.SIG.equals(type)) {
                    COSDictionary cert = (COSDictionary) field.getDictionaryObject(COSName.V);
                    if (cert != null) {
                        System.out.println("Certificate found");
                        System.out.println("Name=" + cert.getDictionaryObject(COSName.NAME));
                        System.out.println("Modified=" + cert.getDictionaryObject(COSName.M));
                        COSName subFilter = (COSName) cert.getDictionaryObject(COSName.SUB_FILTER);
                        if (subFilter != null) {
                            if (subFilter.getName().equals("adbe.x509.rsa_sha1")) {
                                COSString certString = (COSString) cert
                                        .getDictionaryObject(COSName.getPDFName("Cert"));
                                byte[] certData = certString.getBytes();
                                CertificateFactory factory = CertificateFactory.getInstance("X.509");
                                ByteArrayInputStream certStream = new ByteArrayInputStream(certData);
                                Collection<? extends Certificate> certs = factory
                                        .generateCertificates(certStream);
                                System.out.println("certs=" + certs);
                            } else if (subFilter.getName().equals("adbe.pkcs7.sha1")) {
                                COSString certString = (COSString) cert.getDictionaryObject(COSName.CONTENTS);
                                byte[] certData = certString.getBytes();
                                CertificateFactory factory = CertificateFactory.getInstance("X.509");
                                ByteArrayInputStream certStream = new ByteArrayInputStream(certData);
                                Collection<? extends Certificate> certs = factory
                                        .generateCertificates(certStream);
                                System.out.println("certs=" + certs);
                            } else {
                                System.err.println("Unknown certificate type:" + subFilter);
                            }
                        } else {
                            throw new IOException("Missing subfilter for cert dictionary");
                        }
                    } else {
                        System.out.println("Signature found, but no certificate");
                    }
                }
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:com.ackpdfbox.app.PDFToImage.java

License:Apache License

/**
 * Infamous main method./*from   ww  w.  j  av  a 2s. co m*/
 *
 * @param args Command line arguments, should be one and a reference to a file.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {
    // suppress the Dock icon on OS X
    System.setProperty("apple.awt.UIElement", "true");

    String password = "";
    String pdfFile = null;
    String outputPrefix = null;
    String imageFormat = "jpg";
    int startPage = 1;
    int endPage = Integer.MAX_VALUE;
    String color = "rgb";
    int dpi;
    float cropBoxLowerLeftX = 0;
    float cropBoxLowerLeftY = 0;
    float cropBoxUpperRightX = 0;
    float cropBoxUpperRightY = 0;
    boolean showTime = false;
    try {
        dpi = Toolkit.getDefaultToolkit().getScreenResolution();
    } catch (HeadlessException e) {
        dpi = 96;
    }
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals(PASSWORD)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            password = args[i];
        } else if (args[i].equals(START_PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            startPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(END_PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            endPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            startPage = Integer.parseInt(args[i]);
            endPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(IMAGE_TYPE) || args[i].equals(FORMAT)) {
            i++;
            imageFormat = args[i];
        } else if (args[i].equals(OUTPUT_PREFIX) || args[i].equals(PREFIX)) {
            i++;
            outputPrefix = args[i];
        } else if (args[i].equals(COLOR)) {
            i++;
            color = args[i];
        } else if (args[i].equals(RESOLUTION) || args[i].equals(DPI)) {
            i++;
            dpi = Integer.parseInt(args[i]);
        } else if (args[i].equals(CROPBOX)) {
            i++;
            cropBoxLowerLeftX = Float.valueOf(args[i]);
            i++;
            cropBoxLowerLeftY = Float.valueOf(args[i]);
            i++;
            cropBoxUpperRightX = Float.valueOf(args[i]);
            i++;
            cropBoxUpperRightY = Float.valueOf(args[i]);
        } else if (args[i].equals(TIME)) {
            showTime = true;
        } else {
            if (pdfFile == null) {
                pdfFile = args[i];
            }
        }
    }
    if (pdfFile == null) {
        usage();
    } else {
        if (outputPrefix == null) {
            outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.'));
        }

        PDDocument document = null;
        try {
            document = PDDocument.load(new File(pdfFile), password);

            ImageType imageType = null;
            if ("bilevel".equalsIgnoreCase(color)) {
                imageType = ImageType.BINARY;
            } else if ("gray".equalsIgnoreCase(color)) {
                imageType = ImageType.GRAY;
            } else if ("rgb".equalsIgnoreCase(color)) {
                imageType = ImageType.RGB;
            } else if ("rgba".equalsIgnoreCase(color)) {
                imageType = ImageType.ARGB;
            }

            if (imageType == null) {
                System.err.println("Error: Invalid color.");
                System.exit(2);
            }

            //if a CropBox has been specified, update the CropBox:
            //changeCropBoxes(PDDocument document,float a, float b, float c,float d)
            if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0
                    || cropBoxUpperRightY != 0) {
                changeCropBox(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX,
                        cropBoxUpperRightY);
            }

            long startTime = System.nanoTime();

            // render the pages
            boolean success = true;
            endPage = Math.min(endPage, document.getNumberOfPages());
            PDFRenderer renderer = new PDFRenderer(document);
            for (int i = startPage - 1; i < endPage; i++) {
                BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType);
                String fileName = outputPrefix + (i + 1) + "." + imageFormat;
                success &= ImageIOUtil.writeImage(image, fileName, dpi);
            }

            // performance stats
            long endTime = System.nanoTime();
            long duration = endTime - startTime;
            int count = 1 + endPage - startPage;
            if (showTime) {
                System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s",
                        duration / 1000000);
            }

            if (!success) {
                System.err.println("Error: no writer found for image format '" + imageFormat + "'");
                System.exit(1);
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:com.jaeksoft.searchlib.parser.PdfParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, final LanguageEnum lang) throws IOException {
    PdfOcrContext context = new PdfOcrContext();
    context.lang = lang;/* w  w  w  . j a  v a2  s .  co  m*/
    String fileName = null;
    try {
        String ghostScriptBinaryPath = getStringProperty(ClassPropertyEnum.GHOSTSCRIPT_BINARYPATH);
        context.ghostScript = StringUtils.isEmpty(ghostScriptBinaryPath) ? null
                : new GhostScript(ghostScriptBinaryPath);
        fileName = streamLimiter.getFile().getName();
        context.pdfFile = streamLimiter.getFile();
        context.pdf = PDDocument.load(context.pdfFile, null);
        try {
            if (context.pdf.isEncrypted())
                context.pdfPassword = decrypt(context.pdf, context.pdfFile);
        } catch (Exception e) {
            Logging.warn("PDFBox decryption failed " + fileName);
            IOUtils.closeQuietly(context.pdf);
            context.pdf = null;
        }
        ParserResultItem result = getNewParserResultItem();
        result.addField(ParserFieldEnum.pdfcrack_password, context.pdfPassword);
        if (context.pdf != null)
            extractMetaData(result, context.pdf);
        int charCount = 0;
        if (context.ghostScript == null) {
            if (context.pdf != null)
                charCount = extractTextContent(result, context.pdf);
        } else
            charCount = extractTextContent(result, context);
        if (charCount == 0 && context.pdf != null)
            extractImagesForOCR(result, context);
        result.langDetection(10000, ParserFieldEnum.content);
    } catch (SearchLibException e) {
        throw new IOException("Failed on " + fileName, e);
    } catch (InterruptedException e) {
        throw new IOException("Failed on " + fileName, e);
    } catch (java.util.concurrent.ExecutionException e) {
        throw new IOException("Failed on " + fileName, e);
    } finally {
        if (context.pdf != null)
            context.pdf.close();
    }
}

From source file:com.qwazr.library.pdfbox.PdfBoxParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
        String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception {
    parseContent(PDDocument.load(inputStream, getPassword(parameters)), resultBuilder);
}

From source file:com.qwazr.library.pdfbox.PdfBoxParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final Path filePath, String extension,
        final String mimeType, final ParserResultBuilder resultBuilder) throws Exception {
    parseContent(PDDocument.load(filePath.toFile(), getPassword(parameters)), resultBuilder);
}

From source file:com.trollworks.gcs.pdfview.PdfDockable.java

License:Open Source License

public PdfDockable(PdfRef pdfRef, int page, String highlight) {
    super(new BorderLayout());
    mFile = pdfRef.getFile();//w w w . ja  va 2  s . c  o m
    int pageCount = 9999;
    try {
        mPdf = PDDocument.load(pdfRef.getFile(), MemoryUsageSetting.setupMixed(50 * 1024 * 1024));
        pageCount = mPdf.getNumberOfPages();
    } catch (Exception exception) {
        Log.error(exception);
    }
    mToolbar = new Toolbar();

    mZoomInButton = new IconButton(StdImage.get("ZoomIn"), //$NON-NLS-1$
            formatWithKey(SCALE_DOC_UP, KeyStroke.getKeyStroke('=')), () -> mPanel.zoomIn());
    mToolbar.add(mZoomInButton);
    mZoomOutButton = new IconButton(StdImage.get("ZoomOut"), //$NON-NLS-1$
            formatWithKey(SCALE_DOC_DOWN, KeyStroke.getKeyStroke('-')), () -> mPanel.zoomOut());
    mToolbar.add(mZoomOutButton);
    mActualSizeButton = new IconButton(StdImage.get("ActualSize"), //$NON-NLS-1$
            formatWithKey(ACTUAL_SIZE, KeyStroke.getKeyStroke('1')), () -> mPanel.actualSize());
    mToolbar.add(mActualSizeButton);
    mZoomStatus = new JLabel("100%"); //$NON-NLS-1$
    mToolbar.add(mZoomStatus);

    mPageField = new EditorField(new DefaultFormatterFactory(new IntegerFormatter(1, pageCount, false)),
            event -> {
                if (mPanel != null) {
                    int pageIndex = ((Integer) mPageField.getValue()).intValue() - 1;
                    int newPageIndex = mPanel.goToPageIndex(pageIndex, null);
                    if (pageIndex != newPageIndex) {
                        mPageField.setValue(Integer.valueOf(newPageIndex + 1));
                    } else {
                        mPanel.requestFocusInWindow();
                    }
                }
            }, SwingConstants.RIGHT, Integer.valueOf(page), Integer.valueOf(9999), null);
    mToolbar.add(mPageField, Toolbar.LAYOUT_EXTRA_BEFORE);
    mPageStatus = new JLabel("/ -"); //$NON-NLS-1$
    mToolbar.add(mPageStatus);
    mPreviousPageButton = new IconButton(StdImage.get("PageUp"), //$NON-NLS-1$
            formatWithKey(PREVIOUS_PAGE, KeyStroke.getKeyStroke(KeyEvent.VK_UP, 0)),
            () -> mPanel.previousPage());
    mToolbar.add(mPreviousPageButton);
    mNextPageButton = new IconButton(StdImage.get("PageDown"), //$NON-NLS-1$
            formatWithKey(NEXT_PAGE, KeyStroke.getKeyStroke(KeyEvent.VK_DOWN, 0)), () -> mPanel.nextPage());
    mToolbar.add(mNextPageButton);

    add(mToolbar, BorderLayout.NORTH);
    mPanel = new PdfPanel(this, mPdf, pdfRef, page, highlight);
    add(new JScrollPane(mPanel), BorderLayout.CENTER);

    setFocusCycleRoot(true);
    setFocusTraversalPolicy(new DefaultFocusTraversalPolicy());
}

From source file:com.yiyihealth.util.PDF2Image.java

License:Apache License

/**
 * Infamous main method./*  ww  w. j  ava 2 s  .c  o m*/
 *
 * @param args Command line arguments, should be one and a reference to a file.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {
    // suppress the Dock icon on OS X
    System.setProperty("apple.awt.UIElement", "true");

    String password = "";
    String pdfFile = null;
    String outputPrefix = null;
    String imageFormat = "jpg";
    int startPage = 1;
    int endPage = Integer.MAX_VALUE;
    String color = "rgb";
    int dpi;
    float cropBoxLowerLeftX = 0;
    float cropBoxLowerLeftY = 0;
    float cropBoxUpperRightX = 0;
    float cropBoxUpperRightY = 0;
    boolean showTime = false;
    try {
        dpi = Toolkit.getDefaultToolkit().getScreenResolution();
    } catch (HeadlessException e) {
        dpi = 96;
    }
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals(PASSWORD)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            password = args[i];
        } else if (args[i].equals(START_PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            startPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(END_PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            endPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(PAGE)) {
            i++;
            if (i >= args.length) {
                usage();
            }
            startPage = Integer.parseInt(args[i]);
            endPage = Integer.parseInt(args[i]);
        } else if (args[i].equals(IMAGE_TYPE) || args[i].equals(FORMAT)) {
            i++;
            imageFormat = args[i];
        } else if (args[i].equals(OUTPUT_PREFIX) || args[i].equals(PREFIX)) {
            i++;
            outputPrefix = args[i];
        } else if (args[i].equals(COLOR)) {
            i++;
            color = args[i];
        } else if (args[i].equals(RESOLUTION) || args[i].equals(DPI)) {
            i++;
            dpi = Integer.parseInt(args[i]);
        } else if (args[i].equals(CROPBOX)) {
            i++;
            cropBoxLowerLeftX = Float.valueOf(args[i]);
            i++;
            cropBoxLowerLeftY = Float.valueOf(args[i]);
            i++;
            cropBoxUpperRightX = Float.valueOf(args[i]);
            i++;
            cropBoxUpperRightY = Float.valueOf(args[i]);
        } else if (args[i].equals(TIME)) {
            showTime = true;
        } else {
            if (pdfFile == null) {
                pdfFile = args[i];
            }
        }
    }
    if (pdfFile == null) {
        usage();
    } else {
        if (outputPrefix == null) {
            outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.'));
        }

        PDDocument document = null;
        try {
            document = PDDocument.load(new File(pdfFile), password);

            ImageType imageType = null;
            if ("bilevel".equalsIgnoreCase(color)) {
                imageType = ImageType.BINARY;
            } else if ("gray".equalsIgnoreCase(color)) {
                imageType = ImageType.GRAY;
            } else if ("rgb".equalsIgnoreCase(color)) {
                imageType = ImageType.RGB;
            } else if ("rgba".equalsIgnoreCase(color)) {
                imageType = ImageType.ARGB;
            }

            if (imageType == null) {
                System.err.println("Error: Invalid color.");
                System.exit(2);
            }

            //if a CropBox has been specified, update the CropBox:
            //changeCropBoxes(PDDocument document,float a, float b, float c,float d)
            if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0
                    || cropBoxUpperRightY != 0) {
                changeCropBox(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX,
                        cropBoxUpperRightY);
            }

            long startTime = System.nanoTime();

            // render the pages
            boolean success = true;
            endPage = Math.min(endPage, document.getNumberOfPages());
            PDFRenderer renderer = new PDFRenderer(document);
            for (int i = startPage - 1; i < endPage; i++) {
                BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType);
                String fileName = outputPrefix + "_" + (i + 1) + "." + imageFormat;
                success &= ImageIOUtil.writeImage(image, fileName, dpi);
            }

            // performance stats
            long endTime = System.nanoTime();
            long duration = endTime - startTime;
            int count = 1 + endPage - startPage;
            if (showTime) {
                System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s",
                        duration / 1000000);
            }

            if (!success) {
                System.err.println("Error: no writer found for image format '" + imageFormat + "'");
                System.exit(1);
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:cz.incad.kramerius.k5indexer.KrameriusPDFDocument.java

private void setDocument() throws Exception {
    try {/*from   w w w. j  a  v  a2  s  .c om*/
        closeDocument();
        PDDocument pdDocument = PDDocument.load(stream,
                KConfiguration.getInstance().getConfiguration().getString("convert.pdfPassword"));
    } catch (Exception ex) {
        closeDocument();
        logger.log(Level.WARNING, "Cannot parse PDF document", ex);
    }

}

From source file:de.catma.document.source.contenthandler.PDFContentHandler.java

License:Open Source License

public void load(InputStream is) throws IOException {
    PDDocument document = null;/* ww  w. j  a  va2s  . co  m*/
    try {
        document = PDDocument.load(is, false);

        if (document.isEncrypted()) {
            throw new IOException("can not open pdf document because it is encrypted");
        }

        AccessPermission ap = document.getCurrentAccessPermission();
        if (!ap.canExtractContent()) {
            throw new IOException("You do not have permission to extract text");
        }

        PDFTextStripper stripper = new PDFTextStripper("UTF-8");

        stripper.setForceParsing(false);
        stripper.setSortByPosition(false);
        stripper.setShouldSeparateByBeads(true);
        stripper.setStartPage(1);
        stripper.setEndPage(Integer.MAX_VALUE);

        ByteArrayOutputStream os = new ByteArrayOutputStream();
        Writer w = new OutputStreamWriter(os);
        try {
            stripper.writeText(document, w);
        } finally {
            w.close();
        }
        // some pdfs seem to include non valid unicode characters
        // and this causes problems when converting text to HTML
        // for GUI delivery and during indexing 
        setContent(os.toString().replaceAll("[^\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\u10000-\\u10FFFF]",
                "?"));
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:dk.defxws.fedoragsearch.server.TransformerToText.java

License:Open Source License

public int getPdfPagesCount_(byte[] doc) throws Exception {
    String password = "";
    PDDocument pdDoc = null;//from  w w w .jav  a  2s  .c om
    // extract PDF document's textual content
    try {
        pdDoc = PDDocument.load(new ByteArrayInputStream(doc), password);
        return pdDoc.getNumberOfPages();
    } catch (Exception e) {
        throw new Exception("Cannot parse PDF document", e);
    } finally {
        closePDDocument(pdDoc);
    }
}