List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.olat.search.service.document.file.PdfDocument.java
License:Apache License
private String extractTextFromPdf(final VFSLeaf leaf) throws IOException, DocumentAccessException { if (log.isDebug()) { log.debug("readContent from pdf starts..."); }//from w w w .j a v a 2s . co m PDDocument document = null; BufferedInputStream bis = null; try { bis = new BufferedInputStream(leaf.getInputStream()); document = PDDocument.load(bis); if (document.isEncrypted()) { try { document.decrypt(""); } catch (final Exception e) { throw new DocumentAccessException( "PDF is encrypted. Can not read content file=" + leaf.getName()); } } if (log.isDebug()) { log.debug("readContent PDDocument loaded"); } final PDFTextStripper stripper = new PDFTextStripper(); return stripper.getText(document); } finally { if (document != null) { document.close(); } if (bis != null) { bis.close(); } } }
From source file:org.omegat.filters2.pdf.PdfFilter.java
License:Open Source License
@Override public BufferedReader createReader(File infile, String encoding) throws IOException { PDFTextStripper stripper;//from w w w .j a va2 s . com stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.setSortByPosition(true); PDDocument document = PDDocument.load(infile.getAbsolutePath()); String text = stripper.getText(document); document.close(); return new BufferedReader(new StringReader(text)); }
From source file:org.opencps.util.ExtractTextLocations.java
License:Open Source License
public ExtractTextLocations(String fullPath) throws IOException { PDDocument document = null;/*from w ww .j a v a 2 s . c om*/ try { File input = new File(fullPath); document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(StringPool.BLANK); } catch (Exception e) { _log.error(e); } } // ExtractTextLocations printer = new ExtractTextLocations(); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages != null && allPages.size() > 0) { PDPage page = (PDPage) allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { this.processStream(page, page.findResources(), page.getContents().getStream()); } PDRectangle pageSize = page.findMediaBox(); if (pageSize != null) { setPageWidth(pageSize.getWidth()); setPageHeight(pageSize.getHeight()); setPageLLX(pageSize.getLowerLeftX()); setPageURX(pageSize.getUpperRightX()); setPageLLY(pageSize.getLowerLeftY()); setPageURY(pageSize.getUpperRightY()); } } } catch (Exception e) { _log.error(e); } finally { if (document != null) { document.close(); } } }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public List<String> split(final String inputUri, final String outputUri, final List<Integer> pages) throws IOException, COSVisitorException { final List<String> result = new ArrayList<String>(); if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && CollectionUtils.isNotEmpty(pages)) { final PDDocument doc = PDDocument.load(inputUri); final List<PDDocument> splittedDocs = new ArrayList<PDDocument>(); @SuppressWarnings("unchecked") final List<PDPage> pagesList = doc.getDocumentCatalog().getAllPages(); // This section creates a new document for each split // indicated into the list, except the last one. Integer currentPage = 0;/* w ww. j a v a2 s . com*/ for (final Integer page : pages) { final PDDocument document = new PDDocument(); for (Integer i = currentPage; i <= page - 2; i++) { document.addPage(pagesList.get(i)); } splittedDocs.add(document); currentPage = page - 1; document.close(); } // This section splits the last document final PDDocument lastDocument = new PDDocument(); for (Integer i = currentPage; i < pagesList.size(); i++) { lastDocument.addPage(pagesList.get(i)); } splittedDocs.add(lastDocument); lastDocument.close(); Integer subIndex = 1; for (final PDDocument document : splittedDocs) { final String extension = this.converterUtils.addSubIndexBeforeExtension(outputUri, subIndex++); document.save(extension); result.add(extension); } doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } return result; }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public List<String> split(final String inputUri, final String outputUri, final Integer pages) throws IOException, COSVisitorException { final List<String> result = new ArrayList<String>(); if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && pages != null) { final PDDocument doc = PDDocument.load(inputUri); final Splitter splitter = new Splitter(); splitter.setSplitAtPage(pages);//from w w w . j av a 2s . co m final List<PDDocument> splittedDocs = splitter.split(doc); Integer subIndex = 1; for (final PDDocument document : splittedDocs) { final String extension = this.converterUtils.addSubIndexBeforeExtension(outputUri, subIndex++); document.save(extension); result.add(extension); document.close(); } doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } return result; }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public void protect(final String inputUri, final String outputUri, final String password) throws IOException, BadSecurityHandlerException, COSVisitorException { if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotBlank(password)) { final PDDocument doc = PDDocument.load(inputUri); final StandardProtectionPolicy pp = new StandardProtectionPolicy(password, password, new AccessPermission()); doc.protect(pp);/*from w w w. j a v a 2s . c o m*/ doc.save(outputUri); doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public void unProtect(final String inputUri, final String outputUri, final String password) throws IOException, COSVisitorException, BadSecurityHandlerException, CryptographyException { if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotBlank(password)) { final PDDocument doc = PDDocument.load(inputUri); final DecryptionMaterial decryptionMaterial = new StandardDecryptionMaterial(password); doc.openProtection(decryptionMaterial); final StandardProtectionPolicy pp = new StandardProtectionPolicy(null, null, new AccessPermission()); doc.protect(pp);/* w w w . jav a2s.co m*/ doc.save(outputUri); doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public void putWatermark(final String inputUri, final String outputUri, final String text, final Color color, final Float alpha, final WatermarkPosition watermarkPosition, final List<Integer> pages) throws IOException, COSVisitorException, WatermarkOutOfLengthException { if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotBlank(text) && color != null && alpha != null && watermarkPosition != null) { // If watermark position is not centered, then max length is the // same for landscape and portrait, text length is tested here so // there is no need to continue. if (!WatermarkPosition.CENTER.equals(watermarkPosition) && text.length() > watermarkPosition.getMaxLengthPortrait()) { throw new WatermarkOutOfLengthException(Constants.WATERMARK_OUT_OF_LENGTH_EXCEPTION_MESSAGE); }/*ww w . j ava 2 s . c o m*/ final PDDocument doc = PDDocument.load(inputUri); final List<?> allPages = doc.getDocumentCatalog().getAllPages(); this.converterUtils.deleteNonSelectedPositions(allPages, pages); if (CollectionUtils.isNotEmpty(allPages)) { for (final Object object : allPages) { final PDPage page = (PDPage) object; // The transparency, opacity of graphic objects can be set // directly // on the drawing commands but need to be set to a graphic // state // which will become part of the resources. Graphic state is // set // up. this.watermarkUtils.setUpGraphicState(page, alpha); // Now we will be able to call the state definition before // doing // the // drawing try { this.watermarkUtils.addWatermark(doc, page, color, text, watermarkPosition); } catch (final WatermarkOutOfLengthException e) { doc.close(); throw e; } } } doc.save(outputUri); doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public void addBookmarks(final String inputUri, final String outputUri, final String title, final List<PDFGalBookmark> pdfGalBookmarksList) throws IOException, COSVisitorException { if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotEmpty(title) && CollectionUtils.isNotEmpty(pdfGalBookmarksList)) { final PDDocument doc = PDDocument.load(inputUri); final PDDocumentOutline outline = new PDDocumentOutline(); doc.getDocumentCatalog().setDocumentOutline(outline); final PDOutlineItem pagesOutline = new PDOutlineItem(); pagesOutline.setTitle(title);// w w w. j a v a2 s.com @SuppressWarnings("unchecked") final List<PDPage> pages = doc.getDocumentCatalog().getAllPages(); outline.appendChild(pagesOutline); for (final PDFGalBookmark pdfGalBookmark : pdfGalBookmarksList) { if (pdfGalBookmark != null && pdfGalBookmark.isInitializated()) { final PDPage page = pages.get(pdfGalBookmark.getPage() - 1); final PDPageFitWidthDestination dest = new PDPageFitWidthDestination(); dest.setPage(page); final PDOutlineItem bookmark = new PDOutlineItem(); bookmark.setDestination(dest); bookmark.setTitle(pdfGalBookmark.getText()); pagesOutline.appendChild(bookmark); } } pagesOutline.openNode(); outline.openNode(); doc.save(outputUri); doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } }
From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java
License:Open Source License
@Override public void reIndexPageNumbers(final String inputUri, final String outputUri, final List<PDFGalPageNumbering> pdfGalPageNumberingList) throws IOException, COSVisitorException { if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && CollectionUtils.isNotEmpty(pdfGalPageNumberingList)) { final PDDocument doc = PDDocument.load(inputUri); final PDPageLabels pdPageLabels = new PDPageLabels(doc); for (final PDFGalPageNumbering pageNumbering : pdfGalPageNumberingList) { if (pageNumbering.isInitializated()) { final PDPageLabelRange pdPageLabelRange = new PDPageLabelRange(); pdPageLabelRange.setStyle(pageNumbering.getNumberingStyle().getValue()); pdPageLabels.setLabelItem(pageNumbering.getPageNumber() - 1, pdPageLabelRange); }// w w w . j av a 2 s . co m } doc.getDocumentCatalog().setPageLabels(pdPageLabels); doc.save(outputUri); doc.close(); } else { throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE); } }