List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:com.aaasec.sigserv.csspsupport.pdfbox.ReplaceSignature.java
License:EUPL
public File resignPDF() throws IOException, COSVisitorException, SignatureException { File document = new File(model.getOriginalLoc()); byte[] buffer = new byte[8 * 1024]; if (document == null || !document.exists()) { throw new RuntimeException("Document for signing does not exist"); }// ww w. j a va 2 s . co m // creating output document and prepare the IO streams. String name = document.getName(); String substring = name.substring(0, name.lastIndexOf(".")); File outputDocument = new File(model.getSignedLoc()); FileInputStream fis = new FileInputStream(document); FileOutputStream fos = new FileOutputStream(outputDocument); int c; while ((c = fis.read(buffer)) != -1) { fos.write(buffer, 0, c); } fis.close(); fis = new FileInputStream(outputDocument); // load document doc = PDDocument.load(document); // create signature dictionary PDSignature signature = new PDSignature(); signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter // subfilter for basic and PAdES Part 2 signatures signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED); if (model.getSignerName() != null) { signature.setName(model.getSignerName()); } if (model.getSignerLocation() != null) { signature.setLocation(model.getSignerLocation()); } if (model.getReasonForSigning() != null) { signature.setReason(model.getReasonForSigning()); } // the signing date, needed for valid signature Calendar sigDate = Calendar.getInstance(); sigDate.setTime(new Date(model.getSigningAndIdTime())); signature.setSignDate(sigDate); // register signature dictionary and sign interface if (model.getOptions() == null) { doc.addSignature(signature, this); } else { doc.addSignature(signature, this, model.getOptions()); } // write incremental (only for signing purpose) PdfBoxSigUtil.saveIncremental(doc, fis, fos, model.getSigningAndIdTime()); return outputDocument; }
From source file:com.ackpdfbox.app.Encrypt.java
License:Apache License
private void encrypt(String[] args) throws IOException, CertificateException { if (args.length < 1) { usage();//from w ww . ja v a 2s. c o m } else { AccessPermission ap = new AccessPermission(); String infile = null; String outfile = null; String certFile = null; String userPassword = ""; String ownerPassword = ""; int keyLength = 40; PDDocument document = null; try { for (int i = 0; i < args.length; i++) { String key = args[i]; if (key.equals("-O")) { ownerPassword = args[++i]; } else if (key.equals("-U")) { userPassword = args[++i]; } else if (key.equals("-canAssemble")) { ap.setCanAssembleDocument(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canExtractContent")) { ap.setCanExtractContent(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canExtractForAccessibility")) { ap.setCanExtractForAccessibility(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canFillInForm")) { ap.setCanFillInForm(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canModify")) { ap.setCanModify(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canModifyAnnotations")) { ap.setCanModifyAnnotations(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canPrint")) { ap.setCanPrint(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-canPrintDegraded")) { ap.setCanPrintDegraded(args[++i].equalsIgnoreCase("true")); } else if (key.equals("-certFile")) { certFile = args[++i]; } else if (key.equals("-keyLength")) { try { keyLength = Integer.parseInt(args[++i]); } catch (NumberFormatException e) { throw new NumberFormatException( "Error: -keyLength is not an integer '" + args[i] + "'"); } } else if (infile == null) { infile = key; } else if (outfile == null) { outfile = key; } else { usage(); } } if (infile == null) { usage(); } if (outfile == null) { outfile = infile; } document = PDDocument.load(new File(infile)); if (!document.isEncrypted()) { if (certFile != null) { PublicKeyProtectionPolicy ppp = new PublicKeyProtectionPolicy(); PublicKeyRecipient recip = new PublicKeyRecipient(); recip.setPermission(ap); CertificateFactory cf = CertificateFactory.getInstance("X.509"); InputStream inStream = null; try { inStream = new FileInputStream(certFile); X509Certificate certificate = (X509Certificate) cf.generateCertificate(inStream); recip.setX509(certificate); } finally { if (inStream != null) { inStream.close(); } } ppp.addRecipient(recip); ppp.setEncryptionKeyLength(keyLength); document.protect(ppp); } else { StandardProtectionPolicy spp = new StandardProtectionPolicy(ownerPassword, userPassword, ap); spp.setEncryptionKeyLength(keyLength); document.protect(spp); } document.save(outfile); } else { System.err.println("Error: Document is already encrypted."); } } finally { if (document != null) { document.close(); } } } }
From source file:com.amolik.misc.ExtractTextByArea.java
License:Apache License
/** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. *///from w w w . ja v a 2 s . co m public static void main(String[] args) throws IOException { //args[0]= "E:\\Automation\\uphillit\\Fiscal_demo_data.pdf"; // if( args.length != 1 ) // { // usage(); // } // else // { PDDocument document = null; try { document = PDDocument.load(new File("E:\\Automation\\uphillit\\Fiscal_demo_data.pdf")); int numberOfPages = document.getNumberOfPages(); if (numberOfPages > 0) { PDPage page = (PDPage) document.getPages().get(0); System.out.println(page.getContents()); } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); Rectangle rect = new Rectangle(3, 1, 600, 6000); stripper.addRegion("class1", rect); PDPage firstPage = document.getPage(0); stripper.extractRegions(firstPage); System.out.println("Text in the area:" + rect); System.out.println(stripper.getTextForRegion("class1")); } finally { if (document != null) { document.close(); } } // } }
From source file:com.apache.pdfbox.ocr.tesseract.BadScan.java
License:Apache License
@Test public void textBadScan() { try {//from www. j av a 2 s.c om PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void textBadScan() { try {/*from w w w. j a v a 2s.c o m*/ PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(true, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testBufferedImage() { try {//from w ww .j ava 2s. c om PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf1.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testByteStream() { try {//from ww w. j a v a 2s . c om PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf2.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); int width = image.getWidth(); int height = image.getHeight(); int bpp = 3; int bpl = width * 3; TessBaseAPI api = new TessBaseAPI(); byte data[] = api.getByteStream(image); boolean init = api.init("eng"); api.setImage(data, width, height, bpp, bpl); String text = api.getUTF8Text(); System.out.println(text); api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java
License:Apache License
@Test public void testIterator() { try {/*from w w w . ja va 2s . co m*/ PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf3.pdf")); PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImage(0, 3); TessBaseAPI api = new TessBaseAPI(); boolean init = api.init("eng"); api.setBufferedImage(image); api.getResultIterator(); if (api.isResultIteratorAvailable()) { do { System.out.println(api.getWord().trim()); String result = api.getBoundingBox(); System.out.println(result); } while (api.resultIteratorNext()); } api.end(); assertEquals(init, true); document.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java
/** * ? ? pdf/*ww w .j a v a 2s .com*/ * * @param fileName ? * @return ?? ? pdf * @throws IOException */ private ArrayList<String> getListOfWordsFromPDF(String fileName) throws IOException { List<String> result = new ArrayList<String>(); PDDocument pdf = PDDocument.load(fileName); PDFTextStripper reader = new PDFTextStripper(); StringBuilder builder = new StringBuilder(); builder.append(reader.getText(pdf)); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } return (ArrayList<String>) result; }
From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java
/** * ? ? pdf//from w w w . j a v a 2 s .c o m * * @param fileName ? * @return ?? ? pdf * @throws IOException */ private ArrayList<String> getListOfWordsFromTxt(String fileName) throws IOException { List<String> result = new ArrayList<String>(); PDDocument pdf = PDDocument.load(fileName); PDFTextStripper reader = new PDFTextStripper(); StringBuilder builder = new StringBuilder(); builder.append(reader.getText(pdf)); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } return (ArrayList<String>) result; }