List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.geoserver.wms.map.PDFGetMapTest.java
License:Open Source License
/** * Returns the last tiling pattern found during a render of the PDF document. Can be used to extract * one tiling pattern that gets actually used to render shapes (meant to be used against a document * that only has a single tiling pattern) * // ww w . j av a2 s .c o m * @param pdfDocument * @return * @throws InvalidPasswordException * @throws IOException */ PDTilingPattern getTilingPattern(byte[] pdfDocument) throws InvalidPasswordException, IOException { // load the document using PDFBOX (iText is no good for parsing tiling patterns, mostly works // well for text and image extraction, spent a few hours trying to use it with no results) PDDocument doc = PDDocument.load(pdfDocument); PDPage page = doc.getPage(0); // use a graphics stream engine, it's the only thing I could find that parses the PDF // deep enough to allow catching the tiling pattern in parsed form AtomicReference<PDTilingPattern> pattern = new AtomicReference<>(); PDFStreamEngine engine = new PDFGraphicsStreamEngine(page) { @Override public void strokePath() throws IOException { } @Override public void shadingFill(COSName shadingName) throws IOException { } @Override public void moveTo(float x, float y) throws IOException { } @Override public void lineTo(float x, float y) throws IOException { } @Override public Point2D getCurrentPoint() throws IOException { return null; } @Override public void fillPath(int windingRule) throws IOException { } @Override public void fillAndStrokePath(int windingRule) throws IOException { } @Override public void endPath() throws IOException { } @Override public void drawImage(PDImage pdImage) throws IOException { } @Override public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException { } @Override public void closePath() throws IOException { } @Override public void clip(int windingRule) throws IOException { } @Override public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException { } }; // setup the tiling pattern trap engine.addOperator(new SetNonStrokingColorN() { @Override public void process(Operator operator, List<COSBase> arguments) throws IOException { super.process(operator, arguments); PDColor color = context.getGraphicsState().getNonStrokingColor(); if (context.getGraphicsState().getNonStrokingColorSpace() instanceof PDPattern) { PDPattern colorSpace = (PDPattern) context.getGraphicsState().getNonStrokingColorSpace(); PDAbstractPattern ap = colorSpace.getPattern(color); if (ap instanceof PDTilingPattern) { pattern.set((PDTilingPattern) ap); } } } }); // run it engine.processPage(page); return pattern.get(); }
From source file:org.ghost4j.document.PDFDocument.java
License:LGPL
@Override public void load(InputStream inputStream) throws IOException { super.load(inputStream); // check that the file is a PDF ByteArrayInputStream bais = null; PDDocument document = null;/*from w w w. j av a2s . c o m*/ try { bais = new ByteArrayInputStream(content); document = PDDocument.load(bais); } catch (Exception e) { throw new IOException("PDF document is not valid"); } finally { if (document != null) document.close(); IOUtils.closeQuietly(bais); } }
From source file:org.ghost4j.document.PDFDocument.java
License:LGPL
public int getPageCount() throws DocumentException { int pageCount = 0; if (content == null) { return pageCount; }//from ww w. j a v a2 s . com ByteArrayInputStream bais = null; PDDocument document = null; try { bais = new ByteArrayInputStream(content); document = PDDocument.load(bais); pageCount = document.getNumberOfPages(); } catch (Exception e) { throw new DocumentException(e); } finally { if (document != null) try { document.close(); } catch (IOException e) { e.printStackTrace(); } IOUtils.closeQuietly(bais); } return pageCount; }
From source file:org.ghost4j.document.PDFDocument.java
License:LGPL
public Document extract(int begin, int end) throws DocumentException { this.assertValidPageRange(begin, end); PDFDocument result = new PDFDocument(); ByteArrayInputStream bais = null; ByteArrayOutputStream baos = null; if (content != null) { PDDocument document = new PDDocument(); try {/*from w w w. java 2 s . c om*/ bais = new ByteArrayInputStream(content); baos = new ByteArrayOutputStream(); PDDocument inputPDF = PDDocument.load(bais); while (begin <= end) { document.addPage((PDPage) inputPDF.getDocumentCatalog().getAllPages().get(begin - 1)); begin++; } document.save(baos); document.close(); result.load(new ByteArrayInputStream(baos.toByteArray())); } catch (Exception e) { throw new DocumentException(e); } finally { IOUtils.closeQuietly(bais); IOUtils.closeQuietly(baos); } } return result; }
From source file:org.ghost4j.document.PDFDocument.java
License:LGPL
@Override public void append(Document document) throws DocumentException { super.append(document); ByteArrayOutputStream baos = null; PDDocument mergedDocument = new PDDocument(); try {// www . java2s . c om baos = new ByteArrayOutputStream(); ByteArrayInputStream bais = new ByteArrayInputStream(content); PDDocument pDocument = PDDocument.load(bais); int pageCount = pDocument.getNumberOfPages(); for (int i = 0; i < pageCount; i++) { mergedDocument.addPage((PDPage) pDocument.getDocumentCatalog().getAllPages().get(i)); } // copy new document ByteArrayInputStream baisNewDoc = new ByteArrayInputStream(document.getContent()); PDDocument pNewDocument = PDDocument.load(baisNewDoc); pageCount = pNewDocument.getNumberOfPages(); for (int i = 0; i < pageCount; i++) { mergedDocument.addPage((PDPage) pNewDocument.getDocumentCatalog().getAllPages().get(i)); } mergedDocument.save(baos); mergedDocument.close(); // replace content with new content content = baos.toByteArray(); } catch (Exception e) { throw new DocumentException(e); } finally { IOUtils.closeQuietly(baos); } }
From source file:org.github.jipsg.pdfbox.PDDocumentFactory.java
License:Apache License
/** * Create a PDFBox document.//from www . j av a 2 s. c om * * @param source An opaque source * @return the document * @throws IOException the creation failed */ public PDDocument create(Object source) throws IOException { PDDocument result; InputStream is = null; String sourceName = "unknown"; try { if (source instanceof File) { File sourceFile = (File) source; sourceName = sourceFile.getName(); result = PDDocument.load(sourceFile); } else if (source instanceof InputStream) { is = (InputStream) source; result = PDDocument.load(is, true); } else if (source instanceof DataSource) { is = ((DataSource) source).getInputStream(); result = PDDocument.load(is, true); } else if (source instanceof byte[]) { is = new ByteArrayInputStream((byte[]) source); result = PDDocument.load(is, true); } else if (source instanceof String) { File sourceFile = new File((String) source); sourceName = sourceFile.getName(); result = PDDocument.load(sourceFile.getAbsoluteFile()); } else { throw new IllegalAccessException("Don't know how to handle : " + source.getClass().getName()); } if (result.isEncrypted()) { result.decrypt(""); } return result; } catch (Exception e) { String msg = "Parsing the PDF document failed : name=" + sourceName + ", type=" + source.getClass().getName(); throw new IOException(msg, e); } finally { if (is != null) { is.close(); } } }
From source file:org.grouplens.samantha.modeler.dao.PdfFileDAO.java
License:Open Source License
private PdfFileDAO(String filePath) { try {/*from w ww. ja va 2 s .c o m*/ stripper = new PDFTextStripper(); pdfDoc = PDDocument.load(new File(filePath)); } catch (IOException e) { throw new BadRequestException(e); } numPages = pdfDoc.getNumberOfPages(); }
From source file:org.haplo.component.pdfbox.ConvertPDFToText.java
License:Mozilla Public License
protected void performOperation() throws Exception { try (PDDocument pdf = PDDocument.load(new File(this.inputPathname))) { PDFTextStripper stripper = new PDFTextStripper(); try (FileOutputStream out = new FileOutputStream(new File(this.outputPathname))) { try (OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8")) { stripper.writeText(pdf, writer); }/* w ww . j ava2 s .c om*/ } } }
From source file:org.haplo.component.pdfbox.PDF.java
License:Mozilla Public License
/** * Open a PDF and read it's data. close() must be called to clean up nicely. *///from w w w .j ava 2 s . c om public PDF(String filename) throws IOException { if (!Operation.isThreadMarkedAsWorker()) { throw new RuntimeException("PDF manipulation can only be performed in a worker process"); } // Not valid by default isValid = false; // Try to load the page try { // Open the PDF for reading this.pdf = PDDocument.load(new File(filename)); this.numberOfPages = this.pdf.getNumberOfPages(); PDPage page = this.pdf.getPage(0); // Width and height PDRectangle cropBox = page.getCropBox(); width = (int) cropBox.getWidth(); height = (int) cropBox.getHeight(); isValid = true; } catch (Exception e) { // Ignore exception, but do clean up nicely close(); } }
From source file:org.haplo.component.pdfbox.TextExtractPDF.java
License:Mozilla Public License
protected String extract() throws IOException { String text = null;/* w w w .j a va 2 s . c o m*/ try (PDDocument pdf = PDDocument.load(new File(getInputPathname()))) { PDFTextStripper stripper = new PDFTextStripper(); StringWriter writer = new StringWriter(); stripper.writeText(pdf, writer); text = writer.toString(); } return text; }