List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input, String password) throws IOException
From source file:ShowSignature.java
License:Apache License
private void showSignature(String[] args) throws IOException, CertificateException { if (args.length != 2) { usage();/*from w w w . j a v a 2 s . co m*/ } else { String password = args[0]; String infile = args[1]; PDDocument document = null; try { document = PDDocument.load(new File(infile), password); if (!document.isEncrypted()) { System.err.println("Warning: Document is not encrypted."); } COSDictionary trailer = document.getDocument().getTrailer(); COSDictionary root = (COSDictionary) trailer.getDictionaryObject(COSName.ROOT); COSDictionary acroForm = (COSDictionary) root.getDictionaryObject(COSName.ACRO_FORM); COSArray fields = (COSArray) acroForm.getDictionaryObject(COSName.FIELDS); for (int i = 0; i < fields.size(); i++) { COSDictionary field = (COSDictionary) fields.getObject(i); COSName type = field.getCOSName(COSName.FT); if (COSName.SIG.equals(type)) { COSDictionary cert = (COSDictionary) field.getDictionaryObject(COSName.V); if (cert != null) { System.out.println("Certificate found"); System.out.println("Name=" + cert.getDictionaryObject(COSName.NAME)); System.out.println("Modified=" + cert.getDictionaryObject(COSName.M)); COSName subFilter = (COSName) cert.getDictionaryObject(COSName.SUB_FILTER); if (subFilter != null) { if (subFilter.getName().equals("adbe.x509.rsa_sha1")) { COSString certString = (COSString) cert .getDictionaryObject(COSName.getPDFName("Cert")); byte[] certData = certString.getBytes(); CertificateFactory factory = CertificateFactory.getInstance("X.509"); ByteArrayInputStream certStream = new ByteArrayInputStream(certData); Collection<? extends Certificate> certs = factory .generateCertificates(certStream); System.out.println("certs=" + certs); } else if (subFilter.getName().equals("adbe.pkcs7.sha1")) { COSString certString = (COSString) cert.getDictionaryObject(COSName.CONTENTS); byte[] certData = certString.getBytes(); CertificateFactory factory = CertificateFactory.getInstance("X.509"); ByteArrayInputStream certStream = new ByteArrayInputStream(certData); Collection<? extends Certificate> certs = factory .generateCertificates(certStream); System.out.println("certs=" + certs); } else { System.err.println("Unknown certificate type:" + subFilter); } } else { throw new IOException("Missing subfilter for cert dictionary"); } } else { System.out.println("Signature found, but no certificate"); } } } } finally { if (document != null) { document.close(); } } } }
From source file:com.ackpdfbox.app.PDFToImage.java
License:Apache License
/** * Infamous main method./*from ww w. j av a 2s. co m*/ * * @param args Command line arguments, should be one and a reference to a file. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { // suppress the Dock icon on OS X System.setProperty("apple.awt.UIElement", "true"); String password = ""; String pdfFile = null; String outputPrefix = null; String imageFormat = "jpg"; int startPage = 1; int endPage = Integer.MAX_VALUE; String color = "rgb"; int dpi; float cropBoxLowerLeftX = 0; float cropBoxLowerLeftY = 0; float cropBoxUpperRightX = 0; float cropBoxUpperRightY = 0; boolean showTime = false; try { dpi = Toolkit.getDefaultToolkit().getScreenResolution(); } catch (HeadlessException e) { dpi = 96; } for (int i = 0; i < args.length; i++) { if (args[i].equals(PASSWORD)) { i++; if (i >= args.length) { usage(); } password = args[i]; } else if (args[i].equals(START_PAGE)) { i++; if (i >= args.length) { usage(); } startPage = Integer.parseInt(args[i]); } else if (args[i].equals(END_PAGE)) { i++; if (i >= args.length) { usage(); } endPage = Integer.parseInt(args[i]); } else if (args[i].equals(PAGE)) { i++; if (i >= args.length) { usage(); } startPage = Integer.parseInt(args[i]); endPage = Integer.parseInt(args[i]); } else if (args[i].equals(IMAGE_TYPE) || args[i].equals(FORMAT)) { i++; imageFormat = args[i]; } else if (args[i].equals(OUTPUT_PREFIX) || args[i].equals(PREFIX)) { i++; outputPrefix = args[i]; } else if (args[i].equals(COLOR)) { i++; color = args[i]; } else if (args[i].equals(RESOLUTION) || args[i].equals(DPI)) { i++; dpi = Integer.parseInt(args[i]); } else if (args[i].equals(CROPBOX)) { i++; cropBoxLowerLeftX = Float.valueOf(args[i]); i++; cropBoxLowerLeftY = Float.valueOf(args[i]); i++; cropBoxUpperRightX = Float.valueOf(args[i]); i++; cropBoxUpperRightY = Float.valueOf(args[i]); } else if (args[i].equals(TIME)) { showTime = true; } else { if (pdfFile == null) { pdfFile = args[i]; } } } if (pdfFile == null) { usage(); } else { if (outputPrefix == null) { outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.')); } PDDocument document = null; try { document = PDDocument.load(new File(pdfFile), password); ImageType imageType = null; if ("bilevel".equalsIgnoreCase(color)) { imageType = ImageType.BINARY; } else if ("gray".equalsIgnoreCase(color)) { imageType = ImageType.GRAY; } else if ("rgb".equalsIgnoreCase(color)) { imageType = ImageType.RGB; } else if ("rgba".equalsIgnoreCase(color)) { imageType = ImageType.ARGB; } if (imageType == null) { System.err.println("Error: Invalid color."); System.exit(2); } //if a CropBox has been specified, update the CropBox: //changeCropBoxes(PDDocument document,float a, float b, float c,float d) if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0 || cropBoxUpperRightY != 0) { changeCropBox(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX, cropBoxUpperRightY); } long startTime = System.nanoTime(); // render the pages boolean success = true; endPage = Math.min(endPage, document.getNumberOfPages()); PDFRenderer renderer = new PDFRenderer(document); for (int i = startPage - 1; i < endPage; i++) { BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType); String fileName = outputPrefix + (i + 1) + "." + imageFormat; success &= ImageIOUtil.writeImage(image, fileName, dpi); } // performance stats long endTime = System.nanoTime(); long duration = endTime - startTime; int count = 1 + endPage - startPage; if (showTime) { System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s", duration / 1000000); } if (!success) { System.err.println("Error: no writer found for image format '" + imageFormat + "'"); System.exit(1); } } finally { if (document != null) { document.close(); } } } }
From source file:com.jaeksoft.searchlib.parser.PdfParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, final LanguageEnum lang) throws IOException { PdfOcrContext context = new PdfOcrContext(); context.lang = lang;/* w w w . j a v a2 s . co m*/ String fileName = null; try { String ghostScriptBinaryPath = getStringProperty(ClassPropertyEnum.GHOSTSCRIPT_BINARYPATH); context.ghostScript = StringUtils.isEmpty(ghostScriptBinaryPath) ? null : new GhostScript(ghostScriptBinaryPath); fileName = streamLimiter.getFile().getName(); context.pdfFile = streamLimiter.getFile(); context.pdf = PDDocument.load(context.pdfFile, null); try { if (context.pdf.isEncrypted()) context.pdfPassword = decrypt(context.pdf, context.pdfFile); } catch (Exception e) { Logging.warn("PDFBox decryption failed " + fileName); IOUtils.closeQuietly(context.pdf); context.pdf = null; } ParserResultItem result = getNewParserResultItem(); result.addField(ParserFieldEnum.pdfcrack_password, context.pdfPassword); if (context.pdf != null) extractMetaData(result, context.pdf); int charCount = 0; if (context.ghostScript == null) { if (context.pdf != null) charCount = extractTextContent(result, context.pdf); } else charCount = extractTextContent(result, context); if (charCount == 0 && context.pdf != null) extractImagesForOCR(result, context); result.langDetection(10000, ParserFieldEnum.content); } catch (SearchLibException e) { throw new IOException("Failed on " + fileName, e); } catch (InterruptedException e) { throw new IOException("Failed on " + fileName, e); } catch (java.util.concurrent.ExecutionException e) { throw new IOException("Failed on " + fileName, e); } finally { if (context.pdf != null) context.pdf.close(); } }
From source file:com.qwazr.library.pdfbox.PdfBoxParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { parseContent(PDDocument.load(inputStream, getPassword(parameters)), resultBuilder); }
From source file:com.qwazr.library.pdfbox.PdfBoxParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final Path filePath, String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { parseContent(PDDocument.load(filePath.toFile(), getPassword(parameters)), resultBuilder); }
From source file:com.trollworks.gcs.pdfview.PdfDockable.java
License:Open Source License
public PdfDockable(PdfRef pdfRef, int page, String highlight) { super(new BorderLayout()); mFile = pdfRef.getFile();//w w w . ja va 2 s . c o m int pageCount = 9999; try { mPdf = PDDocument.load(pdfRef.getFile(), MemoryUsageSetting.setupMixed(50 * 1024 * 1024)); pageCount = mPdf.getNumberOfPages(); } catch (Exception exception) { Log.error(exception); } mToolbar = new Toolbar(); mZoomInButton = new IconButton(StdImage.get("ZoomIn"), //$NON-NLS-1$ formatWithKey(SCALE_DOC_UP, KeyStroke.getKeyStroke('=')), () -> mPanel.zoomIn()); mToolbar.add(mZoomInButton); mZoomOutButton = new IconButton(StdImage.get("ZoomOut"), //$NON-NLS-1$ formatWithKey(SCALE_DOC_DOWN, KeyStroke.getKeyStroke('-')), () -> mPanel.zoomOut()); mToolbar.add(mZoomOutButton); mActualSizeButton = new IconButton(StdImage.get("ActualSize"), //$NON-NLS-1$ formatWithKey(ACTUAL_SIZE, KeyStroke.getKeyStroke('1')), () -> mPanel.actualSize()); mToolbar.add(mActualSizeButton); mZoomStatus = new JLabel("100%"); //$NON-NLS-1$ mToolbar.add(mZoomStatus); mPageField = new EditorField(new DefaultFormatterFactory(new IntegerFormatter(1, pageCount, false)), event -> { if (mPanel != null) { int pageIndex = ((Integer) mPageField.getValue()).intValue() - 1; int newPageIndex = mPanel.goToPageIndex(pageIndex, null); if (pageIndex != newPageIndex) { mPageField.setValue(Integer.valueOf(newPageIndex + 1)); } else { mPanel.requestFocusInWindow(); } } }, SwingConstants.RIGHT, Integer.valueOf(page), Integer.valueOf(9999), null); mToolbar.add(mPageField, Toolbar.LAYOUT_EXTRA_BEFORE); mPageStatus = new JLabel("/ -"); //$NON-NLS-1$ mToolbar.add(mPageStatus); mPreviousPageButton = new IconButton(StdImage.get("PageUp"), //$NON-NLS-1$ formatWithKey(PREVIOUS_PAGE, KeyStroke.getKeyStroke(KeyEvent.VK_UP, 0)), () -> mPanel.previousPage()); mToolbar.add(mPreviousPageButton); mNextPageButton = new IconButton(StdImage.get("PageDown"), //$NON-NLS-1$ formatWithKey(NEXT_PAGE, KeyStroke.getKeyStroke(KeyEvent.VK_DOWN, 0)), () -> mPanel.nextPage()); mToolbar.add(mNextPageButton); add(mToolbar, BorderLayout.NORTH); mPanel = new PdfPanel(this, mPdf, pdfRef, page, highlight); add(new JScrollPane(mPanel), BorderLayout.CENTER); setFocusCycleRoot(true); setFocusTraversalPolicy(new DefaultFocusTraversalPolicy()); }
From source file:com.yiyihealth.util.PDF2Image.java
License:Apache License
/** * Infamous main method./* ww w. j ava 2 s .c o m*/ * * @param args Command line arguments, should be one and a reference to a file. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { // suppress the Dock icon on OS X System.setProperty("apple.awt.UIElement", "true"); String password = ""; String pdfFile = null; String outputPrefix = null; String imageFormat = "jpg"; int startPage = 1; int endPage = Integer.MAX_VALUE; String color = "rgb"; int dpi; float cropBoxLowerLeftX = 0; float cropBoxLowerLeftY = 0; float cropBoxUpperRightX = 0; float cropBoxUpperRightY = 0; boolean showTime = false; try { dpi = Toolkit.getDefaultToolkit().getScreenResolution(); } catch (HeadlessException e) { dpi = 96; } for (int i = 0; i < args.length; i++) { if (args[i].equals(PASSWORD)) { i++; if (i >= args.length) { usage(); } password = args[i]; } else if (args[i].equals(START_PAGE)) { i++; if (i >= args.length) { usage(); } startPage = Integer.parseInt(args[i]); } else if (args[i].equals(END_PAGE)) { i++; if (i >= args.length) { usage(); } endPage = Integer.parseInt(args[i]); } else if (args[i].equals(PAGE)) { i++; if (i >= args.length) { usage(); } startPage = Integer.parseInt(args[i]); endPage = Integer.parseInt(args[i]); } else if (args[i].equals(IMAGE_TYPE) || args[i].equals(FORMAT)) { i++; imageFormat = args[i]; } else if (args[i].equals(OUTPUT_PREFIX) || args[i].equals(PREFIX)) { i++; outputPrefix = args[i]; } else if (args[i].equals(COLOR)) { i++; color = args[i]; } else if (args[i].equals(RESOLUTION) || args[i].equals(DPI)) { i++; dpi = Integer.parseInt(args[i]); } else if (args[i].equals(CROPBOX)) { i++; cropBoxLowerLeftX = Float.valueOf(args[i]); i++; cropBoxLowerLeftY = Float.valueOf(args[i]); i++; cropBoxUpperRightX = Float.valueOf(args[i]); i++; cropBoxUpperRightY = Float.valueOf(args[i]); } else if (args[i].equals(TIME)) { showTime = true; } else { if (pdfFile == null) { pdfFile = args[i]; } } } if (pdfFile == null) { usage(); } else { if (outputPrefix == null) { outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.')); } PDDocument document = null; try { document = PDDocument.load(new File(pdfFile), password); ImageType imageType = null; if ("bilevel".equalsIgnoreCase(color)) { imageType = ImageType.BINARY; } else if ("gray".equalsIgnoreCase(color)) { imageType = ImageType.GRAY; } else if ("rgb".equalsIgnoreCase(color)) { imageType = ImageType.RGB; } else if ("rgba".equalsIgnoreCase(color)) { imageType = ImageType.ARGB; } if (imageType == null) { System.err.println("Error: Invalid color."); System.exit(2); } //if a CropBox has been specified, update the CropBox: //changeCropBoxes(PDDocument document,float a, float b, float c,float d) if (cropBoxLowerLeftX != 0 || cropBoxLowerLeftY != 0 || cropBoxUpperRightX != 0 || cropBoxUpperRightY != 0) { changeCropBox(document, cropBoxLowerLeftX, cropBoxLowerLeftY, cropBoxUpperRightX, cropBoxUpperRightY); } long startTime = System.nanoTime(); // render the pages boolean success = true; endPage = Math.min(endPage, document.getNumberOfPages()); PDFRenderer renderer = new PDFRenderer(document); for (int i = startPage - 1; i < endPage; i++) { BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType); String fileName = outputPrefix + "_" + (i + 1) + "." + imageFormat; success &= ImageIOUtil.writeImage(image, fileName, dpi); } // performance stats long endTime = System.nanoTime(); long duration = endTime - startTime; int count = 1 + endPage - startPage; if (showTime) { System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s", duration / 1000000); } if (!success) { System.err.println("Error: no writer found for image format '" + imageFormat + "'"); System.exit(1); } } finally { if (document != null) { document.close(); } } } }
From source file:cz.incad.kramerius.k5indexer.KrameriusPDFDocument.java
private void setDocument() throws Exception { try {/*from w w w. j a v a2 s .c om*/ closeDocument(); PDDocument pdDocument = PDDocument.load(stream, KConfiguration.getInstance().getConfiguration().getString("convert.pdfPassword")); } catch (Exception ex) { closeDocument(); logger.log(Level.WARNING, "Cannot parse PDF document", ex); } }
From source file:de.catma.document.source.contenthandler.PDFContentHandler.java
License:Open Source License
public void load(InputStream is) throws IOException { PDDocument document = null;/* ww w. j a va2s . co m*/ try { document = PDDocument.load(is, false); if (document.isEncrypted()) { throw new IOException("can not open pdf document because it is encrypted"); } AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract text"); } PDFTextStripper stripper = new PDFTextStripper("UTF-8"); stripper.setForceParsing(false); stripper.setSortByPosition(false); stripper.setShouldSeparateByBeads(true); stripper.setStartPage(1); stripper.setEndPage(Integer.MAX_VALUE); ByteArrayOutputStream os = new ByteArrayOutputStream(); Writer w = new OutputStreamWriter(os); try { stripper.writeText(document, w); } finally { w.close(); } // some pdfs seem to include non valid unicode characters // and this causes problems when converting text to HTML // for GUI delivery and during indexing setContent(os.toString().replaceAll("[^\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\u10000-\\u10FFFF]", "?")); } finally { if (document != null) { document.close(); } } }
From source file:dk.defxws.fedoragsearch.server.TransformerToText.java
License:Open Source License
public int getPdfPagesCount_(byte[] doc) throws Exception { String password = ""; PDDocument pdDoc = null;//from w w w .jav a 2s .c om // extract PDF document's textual content try { pdDoc = PDDocument.load(new ByteArrayInputStream(doc), password); return pdDoc.getNumberOfPages(); } catch (Exception e) { throw new Exception("Cannot parse PDF document", e); } finally { closePDDocument(pdDoc); } }