List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages
public int getNumberOfPages()
From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java
@Override public List<String> extractTextFromDocumentPagewise(String pathToFile) { List<String> pageWiseText = new LinkedList<>(); COSDocument cosDoc = null;//from w ww.j a v a 2s. c o m PDDocument pdDoc = null; try { PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile))); parser.parse(); cosDoc = parser.getDocument(); pdDoc = new PDDocument(cosDoc); for (int pageId = 0; pageId < pdDoc.getNumberOfPages(); pageId++) { pageWiseText.add(extractTextFromPage(pdDoc, pageId)); } } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } finally { if (pdDoc != null) { try { pdDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } if (cosDoc != null) { try { cosDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } } return pageWiseText; }
From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java
private String extractTextFromPage(PDDocument pdDoc, int page) { try {/*from w ww . j a v a 2s .c om*/ PDFTextStripper pdfStripper = new PDFTextStripper(); pdDoc.getNumberOfPages(); pdfStripper.setStartPage(page + 1); pdfStripper.setEndPage(page + 1); return pdfStripper.getText(pdDoc); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex.getMessage()); } }
From source file:FileIOAux.PrintAux.java
/** * @see/*from ww w. j a v a 2s. c o m*/ * http://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images * @param fil * @return */ public static BufferedImage[] pdfToImage(String fil) { BufferedImage[] bim = null; try { PDDocument document = PDDocument.load(new File(fil)); if (document != null) { PDFRenderer pdfRenderer = new PDFRenderer(document); bim = new BufferedImage[document.getNumberOfPages()]; for (int i = 0; i < document.getNumberOfPages(); i++) { bim[i] = pdfRenderer.renderImage(i); } document.close(); } } catch (IOException ex) { Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex); } return bim; }
From source file:FileIOAux.PrintAux.java
/** * @see//from w ww.java 2 s. com * http://stackoverflow.com/questions/3701644/how-can-i-get-the-total-number-of-pages-to-be-printed */ int getNumberOfPages(PageRanges pageRanges) { if (doc != null) { PDDocument doco; try { doco = PDDocument.load(file); } catch (IOException ex) { Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex); doco = null; } if (doco != null) { int pages = 0; int[][] ranges = pageRanges.getMembers(); for (int[] range : ranges) { pages += 1; if (range.length == 2) { pages += range[1] - range[0]; } } pages = Math.min(pages, doco.getNumberOfPages()); return pages; } else { return 0; } } else { return 0; } }
From source file:fr.acxio.tools.agia.file.pdf.PageSplittingPDDocumentFactory.java
License:Apache License
private PDDocumentContainer splitDocument(PDDocument sDocument, Integer sStartPage, Integer sEndPage, Integer sSplitAtPage) throws IOException { Splitter aSplitter = new Splitter(); int aNumberOfPages = sDocument.getNumberOfPages(); boolean aStartEndPageSet = false; if (sStartPage != null) { aSplitter.setStartPage(sStartPage); aStartEndPageSet = true;//from www .ja va2 s . c o m if (sSplitAtPage == null) { aSplitter.setSplitAtPage(aNumberOfPages); } } if (sEndPage != null) { aSplitter.setEndPage(sEndPage); aStartEndPageSet = true; if (sSplitAtPage == null) { aSplitter.setSplitAtPage(sEndPage); } } if (sSplitAtPage != null) { aSplitter.setSplitAtPage(sSplitAtPage); } else if (!aStartEndPageSet) { aSplitter.setSplitAtPage(1); } List<PDDocument> aParts = aSplitter.split(sDocument); return new BasicPDDocumentContainer(sDocument, aParts); }
From source file:GUI.Helper.PDFIOHelper.java
private static void drawReportHeaderFooter(PDDocument report, Project proj, boolean headerOnFirstPage) { int pageIdx = headerOnFirstPage ? 0 : 1; int marginOffset = 10; try {/*from w w w . java2 s.c om*/ PDPageContentStream cs; for (int p = pageIdx; p < report.getNumberOfPages(); p++) { cs = new PDPageContentStream(report, report.getPage(p), true, false); cs.setFont(PDType1Font.TIMES_ROMAN, 11); cs.setNonStrokingColor(Color.BLACK); cs.beginText(); String dateString = DateFormat.getDateInstance(DateFormat.MEDIUM) .format(Calendar.getInstance().getTime()); cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_LEFT_X, MARGIN_TOP_Y + marginOffset)); cs.showText(dateString); String projectString = "WZ ITS Tool Report: " + proj.getName(); cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_RIGHT_X - (PDType1Font.TIMES_ROMAN.getStringWidth(projectString) / 1000 * 11), MARGIN_TOP_Y + marginOffset)); cs.showText(projectString); String pageNumString = "Page " + String.valueOf(p + 1) + " of " + String.valueOf(report.getNumberOfPages()); cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_LEFT_X + (MARGIN_RIGHT_X - MARGIN_LEFT_X) / 2.0f - (PDType1Font.TIMES_ROMAN.getStringWidth(pageNumString) / 1000 * 11) / 2.0f, MARGIN_BOTTOM_Y - marginOffset)); cs.showText(pageNumString); cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_LEFT_X + 20, MARGIN_BOTTOM_Y - marginOffset)); cs.showText("WZ ITS Tool V" + WZITS_FX.VERSION); String analystAgencyStr = (proj.getAnalyst() != null ? proj.getAnalyst() : "") + (proj.getAnalyst() != null && proj.getAgency() != null ? " / " : "") + (proj.getAgency() != null ? proj.getAgency() : ""); cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_RIGHT_X - (PDType1Font.TIMES_ROMAN.getStringWidth(analystAgencyStr) / 1000 * 11), MARGIN_BOTTOM_Y - marginOffset)); cs.showText(analystAgencyStr); cs.endText(); BufferedImage logoWZITS = ImageIO.read(WZITS_FX.class.getResource("/GUI/Icon/wzits_icon_64.png")); //ColorConvertOp op = new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null); //op.filter(logoWZITS, logoWZITS); cs.drawImage(LosslessFactory.createFromImage(report, logoWZITS), MARGIN_LEFT_X, MARGIN_BOTTOM_Y - marginOffset - 3, 16, 16); cs.close(); } } catch (IOException e) { System.out.println("Something went wrong"); } }
From source file:hrpod.tools.PDFTools.java
public String getStringFromPDF(InputStream inputStream) { String text = null;//from w ww .j av a2 s . c o m try { COSDocument cosDoc = getParser(inputStream).getDocument(); PDFTextStripper pdfStripper = new PDFTextStripper(); PDDocument pdDoc = new PDDocument(cosDoc); pdfStripper.setStartPage(1); pdfStripper.setEndPage(pdDoc.getNumberOfPages()); text = pdfStripper.getText(pdDoc); } catch (Exception ex) { logger.error("ERROR", ex); } return text; }
From source file:hrpod.tools.PDFTools.java
public String[] getPagesFromPDF(InputStream inputStream) { String[] pages = null;//from w ww . j a v a 2 s . c o m try { COSDocument cosDoc = getParser(inputStream).getDocument(); PDFTextStripper pdfStripper = new PDFTextStripper(); PDDocument pdDoc = new PDDocument(cosDoc); int pagesCount = pdDoc.getNumberOfPages(); pages = new String[pagesCount]; for (int p = 1; p < pagesCount; p++) { pdfStripper.setStartPage(p); pdfStripper.setEndPage(p + 1); pages[p - 1] = pdfStripper.getText(pdDoc); } } catch (IOException e) { logger.error("IO ERROR", e); } catch (Exception ex) { logger.error("ERROR", ex); } return pages; }
From source file:idp.pdf_converter.java
public static void pdf_converter(File file) throws IOException { PDDocument document = PDDocument.load(file); PDFRenderer pdfRenderer = new PDFRenderer(document); String path = System.getProperty("user.dir") + "\\src\\main\\temp\\images\\"; for (int page = 0; page < document.getNumberOfPages(); ++page) { BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.GRAY); ImageIOUtil.writeImage(bim, path + file.getName() + "-" + (page + 1) + ".png", 300); }//ww w . j av a 2 s. com document.close(); }
From source file:indexer.Indexer.java
public static int getPDFPages(String fileLoc) throws IOException { PDDocument doc = PDDocument.load(new File(fileLoc)); int count = doc.getNumberOfPages(); return count; }