Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:com.aaasec.sigserv.csspsupport.pdfbox.ReplaceSignature.java

License:EUPL

public File resignPDF() throws IOException, COSVisitorException, SignatureException {
    File document = new File(model.getOriginalLoc());

    byte[] buffer = new byte[8 * 1024];
    if (document == null || !document.exists()) {
        throw new RuntimeException("Document for signing does not exist");
    }//  ww w. j a  va 2  s  .  co  m

    // creating output document and prepare the IO streams.
    String name = document.getName();
    String substring = name.substring(0, name.lastIndexOf("."));

    File outputDocument = new File(model.getSignedLoc());
    FileInputStream fis = new FileInputStream(document);
    FileOutputStream fos = new FileOutputStream(outputDocument);

    int c;
    while ((c = fis.read(buffer)) != -1) {
        fos.write(buffer, 0, c);
    }
    fis.close();
    fis = new FileInputStream(outputDocument);

    // load document
    doc = PDDocument.load(document);

    // create signature dictionary
    PDSignature signature = new PDSignature();
    signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter
    // subfilter for basic and PAdES Part 2 signatures
    signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
    if (model.getSignerName() != null) {
        signature.setName(model.getSignerName());
    }
    if (model.getSignerLocation() != null) {
        signature.setLocation(model.getSignerLocation());
    }
    if (model.getReasonForSigning() != null) {
        signature.setReason(model.getReasonForSigning());
    }

    // the signing date, needed for valid signature
    Calendar sigDate = Calendar.getInstance();
    sigDate.setTime(new Date(model.getSigningAndIdTime()));
    signature.setSignDate(sigDate);

    // register signature dictionary and sign interface
    if (model.getOptions() == null) {
        doc.addSignature(signature, this);
    } else {
        doc.addSignature(signature, this, model.getOptions());
    }

    // write incremental (only for signing purpose)
    PdfBoxSigUtil.saveIncremental(doc, fis, fos, model.getSigningAndIdTime());

    return outputDocument;
}

From source file:com.ackpdfbox.app.Encrypt.java

License:Apache License

private void encrypt(String[] args) throws IOException, CertificateException {
    if (args.length < 1) {
        usage();//from  w ww . ja v a  2s. c o  m
    } else {
        AccessPermission ap = new AccessPermission();

        String infile = null;
        String outfile = null;
        String certFile = null;
        String userPassword = "";
        String ownerPassword = "";

        int keyLength = 40;

        PDDocument document = null;

        try {
            for (int i = 0; i < args.length; i++) {
                String key = args[i];
                if (key.equals("-O")) {
                    ownerPassword = args[++i];
                } else if (key.equals("-U")) {
                    userPassword = args[++i];
                } else if (key.equals("-canAssemble")) {
                    ap.setCanAssembleDocument(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canExtractContent")) {
                    ap.setCanExtractContent(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canExtractForAccessibility")) {
                    ap.setCanExtractForAccessibility(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canFillInForm")) {
                    ap.setCanFillInForm(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canModify")) {
                    ap.setCanModify(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canModifyAnnotations")) {
                    ap.setCanModifyAnnotations(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canPrint")) {
                    ap.setCanPrint(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-canPrintDegraded")) {
                    ap.setCanPrintDegraded(args[++i].equalsIgnoreCase("true"));
                } else if (key.equals("-certFile")) {
                    certFile = args[++i];
                } else if (key.equals("-keyLength")) {
                    try {
                        keyLength = Integer.parseInt(args[++i]);
                    } catch (NumberFormatException e) {
                        throw new NumberFormatException(
                                "Error: -keyLength is not an integer '" + args[i] + "'");
                    }
                } else if (infile == null) {
                    infile = key;
                } else if (outfile == null) {
                    outfile = key;
                } else {
                    usage();
                }
            }
            if (infile == null) {
                usage();
            }
            if (outfile == null) {
                outfile = infile;
            }
            document = PDDocument.load(new File(infile));

            if (!document.isEncrypted()) {
                if (certFile != null) {
                    PublicKeyProtectionPolicy ppp = new PublicKeyProtectionPolicy();
                    PublicKeyRecipient recip = new PublicKeyRecipient();
                    recip.setPermission(ap);

                    CertificateFactory cf = CertificateFactory.getInstance("X.509");

                    InputStream inStream = null;
                    try {
                        inStream = new FileInputStream(certFile);
                        X509Certificate certificate = (X509Certificate) cf.generateCertificate(inStream);
                        recip.setX509(certificate);
                    } finally {
                        if (inStream != null) {
                            inStream.close();
                        }
                    }

                    ppp.addRecipient(recip);

                    ppp.setEncryptionKeyLength(keyLength);

                    document.protect(ppp);
                } else {
                    StandardProtectionPolicy spp = new StandardProtectionPolicy(ownerPassword, userPassword,
                            ap);
                    spp.setEncryptionKeyLength(keyLength);
                    document.protect(spp);
                }
                document.save(outfile);
            } else {
                System.err.println("Error: Document is already encrypted.");
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:com.amolik.misc.ExtractTextByArea.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 *///from   w w  w  .  ja  v  a 2  s .  co m
public static void main(String[] args) throws IOException {
    //args[0]= "E:\\Automation\\uphillit\\Fiscal_demo_data.pdf";
    //        if( args.length != 1 )
    //        {
    //            usage();
    //        }
    //        else
    //        {
    PDDocument document = null;
    try {
        document = PDDocument.load(new File("E:\\Automation\\uphillit\\Fiscal_demo_data.pdf"));
        int numberOfPages = document.getNumberOfPages();
        if (numberOfPages > 0) {

            PDPage page = (PDPage) document.getPages().get(0);
            System.out.println(page.getContents());
        }
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);
        Rectangle rect = new Rectangle(3, 1, 600, 6000);
        stripper.addRegion("class1", rect);
        PDPage firstPage = document.getPage(0);
        stripper.extractRegions(firstPage);
        System.out.println("Text in the area:" + rect);
        System.out.println(stripper.getTextForRegion("class1"));
    } finally {
        if (document != null) {
            document.close();
        }
    }
    //       }
}

From source file:com.apache.pdfbox.ocr.tesseract.BadScan.java

License:Apache License

@Test
public void textBadScan() {
    try {//from  www.  j av a  2  s.c om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void textBadScan() {
    try {/*from   w w w. j  a  v a 2s.c  o m*/
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/scansmpl.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(true, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testBufferedImage() {
    try {//from  w ww .j  ava  2s.  c om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf1.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);

        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testByteStream() {
    try {//from   ww w.  j a v a  2s  . c  om
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf2.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);
        int width = image.getWidth();
        int height = image.getHeight();
        int bpp = 3;
        int bpl = width * 3;

        TessBaseAPI api = new TessBaseAPI();
        byte data[] = api.getByteStream(image);
        boolean init = api.init("eng");
        api.setImage(data, width, height, bpp, bpl);
        String text = api.getUTF8Text();
        System.out.println(text);
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.apache.pdfbox.ocr.tesseract.TessBaseAPITest.java

License:Apache License

@Test
public void testIterator() {
    try {/*from  w  w  w  .  ja va 2s .  co m*/
        PDDocument document = PDDocument.load(new File("src/test/resources/samples/pdf3.pdf"));
        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImage(0, 3);
        TessBaseAPI api = new TessBaseAPI();
        boolean init = api.init("eng");
        api.setBufferedImage(image);
        api.getResultIterator();
        if (api.isResultIteratorAvailable()) {
            do {
                System.out.println(api.getWord().trim());
                String result = api.getBoundingBox();
                System.out.println(result);
            } while (api.resultIteratorNext());
        }
        api.end();
        assertEquals(init, true);
        document.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *  ? ?  pdf/*ww  w .j  a v  a  2s .com*/
 *
 * @param fileName ? 
 * @return ?? ?  pdf
 * @throws IOException
 */
private ArrayList<String> getListOfWordsFromPDF(String fileName) throws IOException {
    List<String> result = new ArrayList<String>();
    PDDocument pdf = PDDocument.load(fileName);
    PDFTextStripper reader = new PDFTextStripper();
    StringBuilder builder = new StringBuilder();
    builder.append(reader.getText(pdf));
    String[] words = builder.toString().split(" ");
    for (String s : words) {
        result.add(s);
    }
    return (ArrayList<String>) result;
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *  ? ?  pdf//from  w w w . j a v a  2  s  .c  o m
 *
 * @param fileName ? 
 * @return ?? ?  pdf
 * @throws IOException
 */
private ArrayList<String> getListOfWordsFromTxt(String fileName) throws IOException {
    List<String> result = new ArrayList<String>();
    PDDocument pdf = PDDocument.load(fileName);
    PDFTextStripper reader = new PDFTextStripper();
    StringBuilder builder = new StringBuilder();
    builder.append(reader.getText(pdf));
    String[] words = builder.toString().split(" ");
    for (String s : words) {
        result.add(s);
    }
    return (ArrayList<String>) result;
}