List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:de.micromata.genome.gwiki.plugin.pdftextextractor_1_0.PdfTextExtractor.java
License:Apache License
public String extractText(String fileName, InputStream data) { try {/*from w w w .j a va 2 s.co m*/ PDDocument doc = PDDocument.load(data); PDFTextStripper st = new PDFTextStripper("UTF-8"); StringWriter sout = new StringWriter(); st.writeText(doc, sout); doc.close(); return sout.getBuffer().toString(); } catch (IOException ex) { throw new RuntimeIOException(ex); } }
From source file:de.mirkosertic.desktopsearch.pdfpreview.PDFPreviewGenerator.java
License:Open Source License
@Override public synchronized Preview createPreviewFor(File aFile) { PDDocument theDocument = null;/* w w w . j a va2s . c om*/ try { theDocument = PDDocument.load(aFile); List<?> thePages = theDocument.getDocumentCatalog().getAllPages(); if (thePages.isEmpty()) { return null; } PDPage theFirstPage = (PDPage) thePages.get(0); PDRectangle mBox = theFirstPage.findMediaBox(); float theWidthPt = mBox.getWidth(); float theHeightPt = mBox.getHeight(); int theWidthPx = THUMB_WIDTH; // Math.round(widthPt * scaling); int theHeightPx = THUMB_HEIGHT; // Math.round(heightPt * scaling); float theScaling = THUMB_WIDTH / theWidthPt; // resolution / 72.0F; Dimension thePageDimension = new Dimension((int) theWidthPt, (int) theHeightPt); BufferedImage theImage = new BufferedImage(theWidthPx, theHeightPx, BufferedImage.TYPE_INT_RGB); Graphics2D theGraphics = (Graphics2D) theImage.getGraphics(); theGraphics.setBackground(new Color(255, 255, 255, 0)); theGraphics.clearRect(0, 0, theImage.getWidth(), theImage.getHeight()); theGraphics.scale(theScaling, theScaling); PageDrawer theDrawer = new PageDrawer(); theDrawer.drawPage(theGraphics, theFirstPage, thePageDimension); int rotation = theFirstPage.findRotation(); if ((rotation == 90) || (rotation == 270)) { int w = theImage.getWidth(); int h = theImage.getHeight(); BufferedImage rotatedImg = new BufferedImage(w, h, theImage.getType()); Graphics2D g = rotatedImg.createGraphics(); g.rotate(Math.toRadians(rotation), w / 2, h / 2); g.drawImage(theImage, null, 0, 0); } theGraphics.dispose(); return new Preview(ImageUtils.rescale(theImage, THUMB_WIDTH, THUMB_HEIGHT, ImageUtils.RescaleMethod.RESIZE_FIT_ONE_DIMENSION)); } catch (Exception e) { LOGGER.error("Error creating preview for " + aFile, e); return null; } finally { try { // Always close the document theDocument.close(); } catch (Exception e) { } } }
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFApachePDFBoxExtractor.java
License:Apache License
public boolean openDocument(String fullPDFFilePath) throws IOException, Exception { boolean returnCode = false; if (fullPDFFilePath == null) { throw new Exception("There is no full path to a file given."); } // end if/*from w ww . j av a 2s .c o m*/ File pdfFile = new File(fullPDFFilePath); if (pdfFile.isFile() && pdfFile.canRead()) { this.fullPDFFilePath = pdfFile.getAbsolutePath(); this.fullPDFDirectoryPath = pdfFile.getPath(); logger.debug("FilePath.....: " + this.fullPDFFilePath); logger.debug("DirectoryPath: " + this.fullPDFDirectoryPath); // Open the PDF file pdfDocument = PDDocument.load(pdfFile.getAbsolutePath()); logger.debug("PDF contains pages: " + pdfDocument.getNumberOfPages()); // Remove reference to the file object as it is no longer needed (cleanup) pdfFile = null; returnCode = true; } else { throw new Exception("The given PDF file is not a file or not readable (check permissions)."); } // end if..else return returnCode; }
From source file:de.oio.jpdfunit.document.pdflibimpl.PdfBoxAnalyser.java
License:Open Source License
/** * This constructor uses a String parameter to instanciate the PDDocument. * * @param file//ww w .java2 s . c om * The path and the file as String. I.e. "/home/bbratkus/test.pdf". * @throws IOException */ public PdfBoxAnalyser(final String file) throws IOException { if ((file.equals("") || (file == null))) //$NON-NLS-1$ { throw new IllegalArgumentException(PARAMETER); } try { pdDocument = PDDocument.load(file); } catch (final IOException ioe) { throw new IllegalArgumentException(NODOCINIT); } setContentAsStringBuffer(); getDocumentFonts(); }
From source file:de.oio.jpdfunit.document.pdflibimpl.PdfBoxAnalyser.java
License:Open Source License
/** * This constructor uses a InputStream as parameter to instanciate the * PDDocument.//from w ww . java2 s . co m * * @param pdfFileStream * The Stream which the pdf file is within. * @throws IOException */ public PdfBoxAnalyser(final InputStream pdfStream) throws IOException { if (pdfStream == null) { throw new IllegalArgumentException(PARAMETER); } try { pdDocument = PDDocument.load(pdfStream); } catch (final IOException ioe) { throw new IllegalArgumentException(NODOCINIT); } setContentAsStringBuffer(); getDocumentFonts(); }
From source file:de.prozesskraft.pkraft.Createdoc.java
/** * merge the pdfs/*from w ww .j a va 2 s.c o m*/ */ private static void mergePdf(Map<String, String> pdfRankFiles, String output) { System.out.println("merging pdfs to a single file"); Set<String> keySet = pdfRankFiles.keySet(); ArrayList<String> listKey = new ArrayList(keySet); Collections.sort(listKey); try { PDDocument document = new PDDocument(); // if(document.getNumberOfPages() > 0) // { // System.out.println("deleting empty page"); // document.removePage(0); // } for (String actualKey : listKey) { PDDocument part = PDDocument.load(pdfRankFiles.get(actualKey)); System.out.println("merging " + pdfRankFiles.get(actualKey)); ArrayList<PDPage> list = (ArrayList<PDPage>) part.getDocumentCatalog().getAllPages(); for (PDPage page : list) { document.addPage(page); } } try { System.out.println("writing " + output); document.save(output); } catch (COSVisitorException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:de.redsix.pdfcompare.PdfComparator.java
License:Apache License
private void addSingleDocumentToResult(InputStream expectedPdfIS, int markerColor) throws IOException { try (PDDocument expectedDocument = PDDocument.load(expectedPdfIS)) { PDFRenderer expectedPdfRenderer = new PDFRenderer(expectedDocument); addExtraPages(expectedDocument, expectedPdfRenderer, 0, markerColor, true); }/* www .j av a 2s. c o m*/ }
From source file:de.tudarmstadt.ukp.dkpro.core.io.pdf.Pdf2CasConverter.java
License:Apache License
public void writeText(final CAS aCas, final InputStream aIs) throws IOException { final PDDocument doc = PDDocument.load(aIs); try {/*www.j ava 2s. c om*/ if (doc.isEncrypted()) { throw new IOException("Encrypted documents currently not supported"); } cas = aCas; text = new StringBuilder(); writeText(doc); } finally { doc.close(); } }
From source file:de.uni_koeln.ub.drc.reader.PdfContentExtractor.java
License:Open Source License
/** * @param pdfName//from ww w . j a va 2s.c o m * The full path to the PDF file to extract content from * @return The PageInfo object for the PDF */ public static PageInfo extractContentFromPdf(String pdfName) { try { location = pdfName; PDDocument document = PDDocument.load(new File(pdfName)); PdfContentExtractor x = initExtractor(document); PageInfo result = x.toPageInfo(); document.close(); return result; } catch (IOException e) { e.printStackTrace(); } return null; }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java
License:Open Source License
@Override public void generateThumbnail(final File input, final File output) throws IOException, ThumbnailerException { FileUtils.deleteQuietly(output);//from w ww . j a v a2 s .c o m PDDocument document = null; try { try { document = PDDocument.load(input); } catch (final IOException e) { throw new ThumbnailerException("Could not load PDF File", e); } final List<?> pages = document.getDocumentCatalog().getAllPages(); final PDPage page = (PDPage) pages.get(0); final BufferedImage tmpImage = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB); if (tmpImage.getWidth() == this.thumbWidth) { ImageIO.write(tmpImage, PDFBoxThumbnailer.OUTPUT_FORMAT, output); } else { final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight); resizer.resizeMethod = ResizeImage.NO_RESIZE_ONLY_CROP; resizer.setInputImage(tmpImage); resizer.writeOutput(output); } } finally { if (document != null) { try { document.close(); } catch (final IOException e) { } } } }