Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:org.geoserver.wms.map.PDFGetMapTest.java

License:Open Source License

/**
 * Returns the last tiling pattern found during a render of the PDF document. Can be used to extract
 * one tiling pattern that gets actually used to render shapes (meant to be used against a document
 * that only has a single tiling pattern)
 * // ww  w .  j av  a2  s  .c  o  m
 * @param pdfDocument
 * @return
 * @throws InvalidPasswordException
 * @throws IOException
 */
PDTilingPattern getTilingPattern(byte[] pdfDocument) throws InvalidPasswordException, IOException {
    // load the document using PDFBOX (iText is no good for parsing tiling patterns, mostly works
    // well for text and image extraction, spent a few hours trying to use it with no results)
    PDDocument doc = PDDocument.load(pdfDocument);
    PDPage page = doc.getPage(0);

    // use a graphics stream engine, it's the only thing I could find that parses the PDF
    // deep enough to allow catching the tiling pattern in parsed form 
    AtomicReference<PDTilingPattern> pattern = new AtomicReference<>();
    PDFStreamEngine engine = new PDFGraphicsStreamEngine(page) {

        @Override
        public void strokePath() throws IOException {
        }

        @Override
        public void shadingFill(COSName shadingName) throws IOException {
        }

        @Override
        public void moveTo(float x, float y) throws IOException {
        }

        @Override
        public void lineTo(float x, float y) throws IOException {
        }

        @Override
        public Point2D getCurrentPoint() throws IOException {
            return null;
        }

        @Override
        public void fillPath(int windingRule) throws IOException {
        }

        @Override
        public void fillAndStrokePath(int windingRule) throws IOException {
        }

        @Override
        public void endPath() throws IOException {
        }

        @Override
        public void drawImage(PDImage pdImage) throws IOException {
        }

        @Override
        public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException {
        }

        @Override
        public void closePath() throws IOException {
        }

        @Override
        public void clip(int windingRule) throws IOException {
        }

        @Override
        public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
        }
    };

    // setup the tiling pattern trap
    engine.addOperator(new SetNonStrokingColorN() {

        @Override
        public void process(Operator operator, List<COSBase> arguments) throws IOException {
            super.process(operator, arguments);

            PDColor color = context.getGraphicsState().getNonStrokingColor();
            if (context.getGraphicsState().getNonStrokingColorSpace() instanceof PDPattern) {
                PDPattern colorSpace = (PDPattern) context.getGraphicsState().getNonStrokingColorSpace();
                PDAbstractPattern ap = colorSpace.getPattern(color);
                if (ap instanceof PDTilingPattern) {
                    pattern.set((PDTilingPattern) ap);
                }
            }
        }
    });
    // run it
    engine.processPage(page);

    return pattern.get();
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

@Override
public void load(InputStream inputStream) throws IOException {
    super.load(inputStream);

    // check that the file is a PDF
    ByteArrayInputStream bais = null;
    PDDocument document = null;/*from  w  w w.  j  av a2s . c  o m*/

    try {

        bais = new ByteArrayInputStream(content);
        document = PDDocument.load(bais);

    } catch (Exception e) {
        throw new IOException("PDF document is not valid");
    } finally {
        if (document != null)
            document.close();
        IOUtils.closeQuietly(bais);
    }
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

public int getPageCount() throws DocumentException {

    int pageCount = 0;

    if (content == null) {
        return pageCount;
    }//from  ww w.  j a  v  a2  s  .  com

    ByteArrayInputStream bais = null;
    PDDocument document = null;

    try {

        bais = new ByteArrayInputStream(content);
        document = PDDocument.load(bais);
        pageCount = document.getNumberOfPages();
    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        if (document != null)
            try {
                document.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        IOUtils.closeQuietly(bais);
    }

    return pageCount;

}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

public Document extract(int begin, int end) throws DocumentException {

    this.assertValidPageRange(begin, end);

    PDFDocument result = new PDFDocument();

    ByteArrayInputStream bais = null;
    ByteArrayOutputStream baos = null;

    if (content != null) {

        PDDocument document = new PDDocument();

        try {/*from  w  w  w. java  2  s  .  c  om*/

            bais = new ByteArrayInputStream(content);
            baos = new ByteArrayOutputStream();
            PDDocument inputPDF = PDDocument.load(bais);
            while (begin <= end) {
                document.addPage((PDPage) inputPDF.getDocumentCatalog().getAllPages().get(begin - 1));
                begin++;
            }
            document.save(baos);
            document.close();
            result.load(new ByteArrayInputStream(baos.toByteArray()));

        } catch (Exception e) {
            throw new DocumentException(e);
        } finally {
            IOUtils.closeQuietly(bais);
            IOUtils.closeQuietly(baos);
        }

    }

    return result;
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

@Override
public void append(Document document) throws DocumentException {

    super.append(document);

    ByteArrayOutputStream baos = null;
    PDDocument mergedDocument = new PDDocument();

    try {//  www  . java2s  . c  om

        baos = new ByteArrayOutputStream();
        ByteArrayInputStream bais = new ByteArrayInputStream(content);
        PDDocument pDocument = PDDocument.load(bais);
        int pageCount = pDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pDocument.getDocumentCatalog().getAllPages().get(i));
        }

        // copy new document
        ByteArrayInputStream baisNewDoc = new ByteArrayInputStream(document.getContent());
        PDDocument pNewDocument = PDDocument.load(baisNewDoc);
        pageCount = pNewDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pNewDocument.getDocumentCatalog().getAllPages().get(i));
        }
        mergedDocument.save(baos);
        mergedDocument.close();
        // replace content with new content
        content = baos.toByteArray();

    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        IOUtils.closeQuietly(baos);
    }

}

From source file:org.github.jipsg.pdfbox.PDDocumentFactory.java

License:Apache License

/**
 * Create a PDFBox document.//from   www  .  j  av  a  2  s. c om
 *
 * @param source An opaque source
 * @return the document
 * @throws IOException the creation failed
 */
public PDDocument create(Object source) throws IOException {

    PDDocument result;
    InputStream is = null;
    String sourceName = "unknown";

    try {
        if (source instanceof File) {
            File sourceFile = (File) source;
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile);
        } else if (source instanceof InputStream) {
            is = (InputStream) source;
            result = PDDocument.load(is, true);
        } else if (source instanceof DataSource) {
            is = ((DataSource) source).getInputStream();
            result = PDDocument.load(is, true);
        } else if (source instanceof byte[]) {
            is = new ByteArrayInputStream((byte[]) source);
            result = PDDocument.load(is, true);
        } else if (source instanceof String) {
            File sourceFile = new File((String) source);
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile.getAbsoluteFile());
        } else {
            throw new IllegalAccessException("Don't know how to handle : " + source.getClass().getName());
        }

        if (result.isEncrypted()) {
            result.decrypt("");
        }

        return result;
    } catch (Exception e) {
        String msg = "Parsing the PDF document failed : name=" + sourceName + ", type="
                + source.getClass().getName();
        throw new IOException(msg, e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
}

From source file:org.grouplens.samantha.modeler.dao.PdfFileDAO.java

License:Open Source License

private PdfFileDAO(String filePath) {
    try {/*from   w  ww.  ja va  2 s .c  o m*/
        stripper = new PDFTextStripper();
        pdfDoc = PDDocument.load(new File(filePath));
    } catch (IOException e) {
        throw new BadRequestException(e);
    }
    numPages = pdfDoc.getNumberOfPages();
}

From source file:org.haplo.component.pdfbox.ConvertPDFToText.java

License:Mozilla Public License

protected void performOperation() throws Exception {
    try (PDDocument pdf = PDDocument.load(new File(this.inputPathname))) {
        PDFTextStripper stripper = new PDFTextStripper();
        try (FileOutputStream out = new FileOutputStream(new File(this.outputPathname))) {
            try (OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8")) {
                stripper.writeText(pdf, writer);
            }/*  w ww  . j  ava2 s .c  om*/
        }
    }
}

From source file:org.haplo.component.pdfbox.PDF.java

License:Mozilla Public License

/**
 * Open a PDF and read it's data. close() must be called to clean up nicely.
 *///from w  w  w  .j  ava  2  s .  c om
public PDF(String filename) throws IOException {
    if (!Operation.isThreadMarkedAsWorker()) {
        throw new RuntimeException("PDF manipulation can only be performed in a worker process");
    }

    // Not valid by default
    isValid = false;

    // Try to load the page
    try {
        // Open the PDF for reading
        this.pdf = PDDocument.load(new File(filename));

        this.numberOfPages = this.pdf.getNumberOfPages();

        PDPage page = this.pdf.getPage(0);

        // Width and height
        PDRectangle cropBox = page.getCropBox();
        width = (int) cropBox.getWidth();
        height = (int) cropBox.getHeight();

        isValid = true;
    } catch (Exception e) {
        // Ignore exception, but do clean up nicely
        close();
    }
}

From source file:org.haplo.component.pdfbox.TextExtractPDF.java

License:Mozilla Public License

protected String extract() throws IOException {
    String text = null;/* w  w  w  .j  a  va 2 s  .  c o m*/
    try (PDDocument pdf = PDDocument.load(new File(getInputPathname()))) {
        PDFTextStripper stripper = new PDFTextStripper();
        StringWriter writer = new StringWriter();
        stripper.writeText(pdf, writer);
        text = writer.toString();
    }
    return text;
}