Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.geoserver.wms.map.PDFGetMapTest.java

License:Open Source License

/**
 * Returns the last tiling pattern found during a render of the PDF document. Can be used to extract
 * one tiling pattern that gets actually used to render shapes (meant to be used against a document
 * that only has a single tiling pattern)
 * // ww  w .  j av  a2  s  .c  o  m
 * @param pdfDocument
 * @return
 * @throws InvalidPasswordException
 * @throws IOException
 */
PDTilingPattern getTilingPattern(byte[] pdfDocument) throws InvalidPasswordException, IOException {
    // load the document using PDFBOX (iText is no good for parsing tiling patterns, mostly works
    // well for text and image extraction, spent a few hours trying to use it with no results)
    PDDocument doc = PDDocument.load(pdfDocument);
    PDPage page = doc.getPage(0);

    // use a graphics stream engine, it's the only thing I could find that parses the PDF
    // deep enough to allow catching the tiling pattern in parsed form 
    AtomicReference<PDTilingPattern> pattern = new AtomicReference<>();
    PDFStreamEngine engine = new PDFGraphicsStreamEngine(page) {

        @Override
        public void strokePath() throws IOException {
        }

        @Override
        public void shadingFill(COSName shadingName) throws IOException {
        }

        @Override
        public void moveTo(float x, float y) throws IOException {
        }

        @Override
        public void lineTo(float x, float y) throws IOException {
        }

        @Override
        public Point2D getCurrentPoint() throws IOException {
            return null;
        }

        @Override
        public void fillPath(int windingRule) throws IOException {
        }

        @Override
        public void fillAndStrokePath(int windingRule) throws IOException {
        }

        @Override
        public void endPath() throws IOException {
        }

        @Override
        public void drawImage(PDImage pdImage) throws IOException {
        }

        @Override
        public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException {
        }

        @Override
        public void closePath() throws IOException {
        }

        @Override
        public void clip(int windingRule) throws IOException {
        }

        @Override
        public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
        }
    };

    // setup the tiling pattern trap
    engine.addOperator(new SetNonStrokingColorN() {

        @Override
        public void process(Operator operator, List<COSBase> arguments) throws IOException {
            super.process(operator, arguments);

            PDColor color = context.getGraphicsState().getNonStrokingColor();
            if (context.getGraphicsState().getNonStrokingColorSpace() instanceof PDPattern) {
                PDPattern colorSpace = (PDPattern) context.getGraphicsState().getNonStrokingColorSpace();
                PDAbstractPattern ap = colorSpace.getPattern(color);
                if (ap instanceof PDTilingPattern) {
                    pattern.set((PDTilingPattern) ap);
                }
            }
        }
    });
    // run it
    engine.processPage(page);

    return pattern.get();
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

@Override
public void load(InputStream inputStream) throws IOException {
    super.load(inputStream);

    // check that the file is a PDF
    ByteArrayInputStream bais = null;
    PDDocument document = null;/*from  w  w w.  j  av a2s . c  o m*/

    try {

        bais = new ByteArrayInputStream(content);
        document = PDDocument.load(bais);

    } catch (Exception e) {
        throw new IOException("PDF document is not valid");
    } finally {
        if (document != null)
            document.close();
        IOUtils.closeQuietly(bais);
    }
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

public int getPageCount() throws DocumentException {

    int pageCount = 0;

    if (content == null) {
        return pageCount;
    }//from  ww w.  j a  v  a2  s  .  com

    ByteArrayInputStream bais = null;
    PDDocument document = null;

    try {

        bais = new ByteArrayInputStream(content);
        document = PDDocument.load(bais);
        pageCount = document.getNumberOfPages();
    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        if (document != null)
            try {
                document.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        IOUtils.closeQuietly(bais);
    }

    return pageCount;

}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

public Document extract(int begin, int end) throws DocumentException {

    this.assertValidPageRange(begin, end);

    PDFDocument result = new PDFDocument();

    ByteArrayInputStream bais = null;
    ByteArrayOutputStream baos = null;

    if (content != null) {

        PDDocument document = new PDDocument();

        try {/*from  w  w  w. java  2  s  .  c  om*/

            bais = new ByteArrayInputStream(content);
            baos = new ByteArrayOutputStream();
            PDDocument inputPDF = PDDocument.load(bais);
            while (begin <= end) {
                document.addPage((PDPage) inputPDF.getDocumentCatalog().getAllPages().get(begin - 1));
                begin++;
            }
            document.save(baos);
            document.close();
            result.load(new ByteArrayInputStream(baos.toByteArray()));

        } catch (Exception e) {
            throw new DocumentException(e);
        } finally {
            IOUtils.closeQuietly(bais);
            IOUtils.closeQuietly(baos);
        }

    }

    return result;
}

From source file:org.ghost4j.document.PDFDocument.java

License:LGPL

@Override
public void append(Document document) throws DocumentException {

    super.append(document);

    ByteArrayOutputStream baos = null;
    PDDocument mergedDocument = new PDDocument();

    try {//  www  . java2s  . c  om

        baos = new ByteArrayOutputStream();
        ByteArrayInputStream bais = new ByteArrayInputStream(content);
        PDDocument pDocument = PDDocument.load(bais);
        int pageCount = pDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pDocument.getDocumentCatalog().getAllPages().get(i));
        }

        // copy new document
        ByteArrayInputStream baisNewDoc = new ByteArrayInputStream(document.getContent());
        PDDocument pNewDocument = PDDocument.load(baisNewDoc);
        pageCount = pNewDocument.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            mergedDocument.addPage((PDPage) pNewDocument.getDocumentCatalog().getAllPages().get(i));
        }
        mergedDocument.save(baos);
        mergedDocument.close();
        // replace content with new content
        content = baos.toByteArray();

    } catch (Exception e) {
        throw new DocumentException(e);
    } finally {
        IOUtils.closeQuietly(baos);
    }

}

From source file:org.github.jipsg.pdfbox.PDDocumentFactory.java

License:Apache License

/**
 * Create a PDFBox document.//from   www  .  j  av  a  2  s. c om
 *
 * @param source An opaque source
 * @return the document
 * @throws IOException the creation failed
 */
public PDDocument create(Object source) throws IOException {

    PDDocument result;
    InputStream is = null;
    String sourceName = "unknown";

    try {
        if (source instanceof File) {
            File sourceFile = (File) source;
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile);
        } else if (source instanceof InputStream) {
            is = (InputStream) source;
            result = PDDocument.load(is, true);
        } else if (source instanceof DataSource) {
            is = ((DataSource) source).getInputStream();
            result = PDDocument.load(is, true);
        } else if (source instanceof byte[]) {
            is = new ByteArrayInputStream((byte[]) source);
            result = PDDocument.load(is, true);
        } else if (source instanceof String) {
            File sourceFile = new File((String) source);
            sourceName = sourceFile.getName();
            result = PDDocument.load(sourceFile.getAbsoluteFile());
        } else {
            throw new IllegalAccessException("Don't know how to handle : " + source.getClass().getName());
        }

        if (result.isEncrypted()) {
            result.decrypt("");
        }

        return result;
    } catch (Exception e) {
        String msg = "Parsing the PDF document failed : name=" + sourceName + ", type="
                + source.getClass().getName();
        throw new IOException(msg, e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
}

From source file:org.grouplens.samantha.modeler.dao.PdfFileDAO.java

License:Open Source License

private PdfFileDAO(String filePath) {
    try {/*from   w  ww.  ja va  2 s .c  o m*/
        stripper = new PDFTextStripper();
        pdfDoc = PDDocument.load(new File(filePath));
    } catch (IOException e) {
        throw new BadRequestException(e);
    }
    numPages = pdfDoc.getNumberOfPages();
}

From source file:org.haplo.component.pdfbox.ConvertPDFToText.java

License:Mozilla Public License

protected void performOperation() throws Exception {
    try (PDDocument pdf = PDDocument.load(new File(this.inputPathname))) {
        PDFTextStripper stripper = new PDFTextStripper();
        try (FileOutputStream out = new FileOutputStream(new File(this.outputPathname))) {
            try (OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8")) {
                stripper.writeText(pdf, writer);
            }/*  w ww  . j  ava2 s .c  om*/
        }
    }
}

From source file:org.haplo.component.pdfbox.PDF.java

License:Mozilla Public License

/**
 * Open a PDF and read it's data. close() must be called to clean up nicely.
 *///from w  w  w  .j  ava  2  s .  c om
public PDF(String filename) throws IOException {
    if (!Operation.isThreadMarkedAsWorker()) {
        throw new RuntimeException("PDF manipulation can only be performed in a worker process");
    }

    // Not valid by default
    isValid = false;

    // Try to load the page
    try {
        // Open the PDF for reading
        this.pdf = PDDocument.load(new File(filename));

        this.numberOfPages = this.pdf.getNumberOfPages();

        PDPage page = this.pdf.getPage(0);

        // Width and height
        PDRectangle cropBox = page.getCropBox();
        width = (int) cropBox.getWidth();
        height = (int) cropBox.getHeight();

        isValid = true;
    } catch (Exception e) {
        // Ignore exception, but do clean up nicely
        close();
    }
}

From source file:org.haplo.component.pdfbox.TextExtractPDF.java

License:Mozilla Public License

protected String extract() throws IOException {
    String text = null;/* w  w  w  .j  a  va 2 s  .  c o m*/
    try (PDDocument pdf = PDDocument.load(new File(getInputPathname()))) {
        PDFTextStripper stripper = new PDFTextStripper();
        StringWriter writer = new StringWriter();
        stripper.writeText(pdf, writer);
        text = writer.toString();
    }
    return text;
}