Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:com.aaasec.sigserv.csspsupport.pdfbox.ReplaceSignature.java

License:EUPL

public File resignPDF() throws IOException, COSVisitorException, SignatureException {
    File document = new File(model.getOriginalLoc());

    byte[] buffer = new byte[8 * 1024];
    if (document == null || !document.exists()) {
        throw new RuntimeException("Document for signing does not exist");
    }//  ww w. j a  va 2  s  .  co  m

    // creating output document and prepare the IO streams.
    String name = document.getName();
    String substring = name.substring(0, name.lastIndexOf("."));

    File outputDocument = new File(model.getSignedLoc());
    FileInputStream fis = new FileInputStream(document);
    FileOutputStream fos = new FileOutputStream(outputDocument);

    int c;
    while ((c = fis.read(buffer)) != -1) {
        fos.write(buffer, 0, c);
    }
    fis.close();
    fis = new FileInputStream(outputDocument);

    // load document
    doc = PDDocument.load(document);

    // create signature dictionary
    PDSignature signature = new PDSignature();
    signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter
    // subfilter for basic and PAdES Part 2 signatures
    signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
    if (model.getSignerName() != null) {
        signature.setName(model.getSignerName());
    }
    if (model.getSignerLocation() != null) {
        signature.setLocation(model.getSignerLocation());
    }
    if (model.getReasonForSigning() != null) {
        signature.setReason(model.getReasonForSigning());
    }

    // the signing date, needed for valid signature
    Calendar sigDate = Calendar.getInstance();
    sigDate.setTime(new Date(model.getSigningAndIdTime()));
    signature.setSignDate(sigDate);

    // register signature dictionary and sign interface
    if (model.getOptions() == null) {
        doc.addSignature(signature, this);
    } else {
        doc.addSignature(signature, this, model.getOptions());
    }

    // write incremental (only for signing purpose)
    PdfBoxSigUtil.saveIncremental(doc, fis, fos, model.getSigningAndIdTime());

    return outputDocument;
}

From source file:com.ackpdfbox.app.Encrypt.java

License:Apache License

private void encrypt(String[] args) throws IOException, CertificateException {
    if (args.length < 1) {
        usage();//from  w ww . ja v a  2s. c o  m
    } else {
        AccessPermission ap = new AccessPermission();

        String infile = null;
        String outfile = null;
        String certFile = null;
        String userPassword = "";
        String ownerPassword = "";

        int keyLength = 40;

        PDDocument document = null;

        try {
            for (int i = 0; i < args.length; i++) {
                String key = args[i];
                if (key.equals("-O")) {
                    ownerPassword = args[++i];
                } else if (key.equals("-U")) {
                    userPassword = args[++i];
                } else if (key.equals("-canAssemble")) {
                    ap.setCanAssembleDocument(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canExtractContent")) {
                    ap.setCanExtractContent(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canExtractForAccessibility")) {
                    ap.setCanExtractForAccessibility(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canFillInForm")) {
                    ap.setCanFillInForm(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canModify")) {
                    ap.setCanModify(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canModifyAnnotations")) {
                    ap.setCanModifyAnnotations(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canPrint")) {
                    ap.setCanPrint(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canPrintDegraded")) {
                    ap.setCanPrintDegraded(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-certFile")) {
                    certFile = args[++i];
                } else if (key.equals("-keyLength")) {
                    try {
                        keyLength = Integer.parseInt(args[++i]);
                    } catch (NumberFormatException e) {
                        throw new NumberFormatException(
                                "Error: -keyLength is not an integer '" + args[i] + "'");
                    }
                } else if (infile == null) {
                    infile = key;
                } else if (outfile == null) {
                    outfile = key;
                } else {
                    usage();
                }
            }
            if (infile == null) {
                usage();
            }
            if (outfile == null) {
                outfile = infile;
            }
            document = PDDocument.load(new File(infile));

            if (!document.isEncrypted()) {
                if (certFile != null) {
                    PublicKeyProtectionPolicy ppp = new PublicKeyProtectionPolicy();
                    PublicKeyRecipient recip = new PublicKeyRecipient();
                    recip.setPermission(ap);

                    CertificateFactory cf = CertificateFactory.getInstance("X.509");

                    InputStream inStream = null;
                    try {
                        inStream = new FileInputStream(certFile);
                        X509Certificate certificate = (X509Certificate) cf.generateCertificate(inStream);
                        recip.setX509(certificate);
                    } finally {
                        if (inStream != null) {
                            inStream.close();
                        }
                    }

                    ppp.addRecipient(recip);

                    ppp.setEncryptionKeyLength(keyLength);

                    document.protect(ppp);
                } else {
                    StandardProtectionPolicy spp = new StandardProtectionPolicy(ownerPassword, userPassword,
                            ap);
                    spp.setEncryptionKeyLength(keyLength);
                    document.protect(spp);
                }
                document.save(outfile);
            } else {
                System.err.println("Error: Document is already encrypted.");
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:com.amolik.misc.ExtractTextByArea.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 *///from   w w  w  .  ja  v  a 2  s .  co m
public static void main(String[] args) throws IOException {
    //args[0]= "E:\\Automation\\uphillit\\Fiscal_demo_data.pdf";
    //        if( args.length != 1 )
    //        {
    //            usage();
    //        }
    //        else
    //        {
    PDDocument document = null;
    try {
        document = PDDocument.load(new File("E:\\Automation\\uphillit\\Fiscal_demo_data.pdf"));
        int numberOfPages = document.getNumberOfPages();
        if (numberOfPages > 0) {

            PDPage page = (PDPage) document.getPages().get(0);
            System.out.println(page.getContents());
        }
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);
        Rectangle rect = new Rectangle(3, 1, 600, 6000);
        stripper.addRegion("class1", rect);
        PDPage firstPage = document.getPage(0);
        stripper.extractRegions(firstPage);
        System.out.println("Text in the area:" + rect);
        System.out.println(stripper.getTextForRegion("class1"));
    } finally {
        if (document != null) {
            document.close();
        }
    }
    //       }
}

From source file:com.apache.pdfbox.ocr.tesseract.BadScan.java

License:Apache License

@Test
public void textBadScan() {
    try {//from  www.  j av a  2  s.c om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void textBadScan() {
    try {/*from   w w w. j  a  v a 2s.c  o m*/
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(true, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testBufferedImage() {
    try {//from  w ww .j  ava  2s.  c om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf1.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testByteStream() {
    try {//from   ww w.  j a v a  2s  . c  om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf2.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);
        int width = image.getWidth();
        int height = image.getHeight();
        int bpp = 3;
        int bpl = width * 3;

        TessBaseAPI api = new TessBaseAPI();
        byte data[] = api.getByteStream(image);
        boolean init = api.init("eng");
        api.setImage(data, width, height, bpp, bpl);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testIterator() {
    try {/*from  w  w  w  .  ja va 2s .  co m*/
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf3.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);
        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        api.getResultIterator();
        if (api.isResultIteratorAvailable()) {
            do {
                System.out.println(api.getWord().trim());
                String result = api.getBoundingBox();
                System.out.println(result);
            } while (api.resultIteratorNext());
        }
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *  ? ?  pdf/*ww  w .j  a v  a  2s .com*/
 *
 * @param fileName ? 
 * @return ?? ?  pdf
 * @throws IOException
 */
private ArrayList<String> getListOfWordsFromPDF(String fileName) throws IOException {
    List<String> result = new ArrayList<String>();
    PDDocument pdf = PDDocument.load(fileName);
    PDFTextStripper reader = new PDFTextStripper();
    StringBuilder builder = new StringBuilder();
    builder.append(reader.getText(pdf));
    String[] words = builder.toString().split(" ");
    for (String s : words) {
        result.add(s);
    }
    return (ArrayList<String>) result;
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *  ? ?  pdf//from  w w w . j a v a  2  s  .c  o m
 *
 * @param fileName ? 
 * @return ?? ?  pdf
 * @throws IOException
 */
private ArrayList<String> getListOfWordsFromTxt(String fileName) throws IOException {
    List<String> result = new ArrayList<String>();
    PDDocument pdf = PDDocument.load(fileName);
    PDFTextStripper reader = new PDFTextStripper();
    StringBuilder builder = new StringBuilder();
    builder.append(reader.getText(pdf));
    String[] words = builder.toString().split(" ");
    for (String s : words) {
        result.add(s);
    }
    return (ArrayList<String>) result;
}