List of usage examples for com.lowagie.text.pdf.parser PdfTextExtractor getTextFromPage
public String getTextFromPage(int page) throws IOException
From source file:de.offis.health.icardea.cied.pdf.extractor.PDFiText2Extractor.java
License:LGPL
public String getText(int pageNumber) throws IOException, Exception { String returnValue = null;// w w w . j a v a 2 s . c o m if (pdfReader != null) { int numberOfPages = getNumberOfPages(); if (pageNumber > 0 && pageNumber <= numberOfPages) { PdfTextExtractor pdfTextExtractor = new PdfTextExtractor(pdfReader); String extractedText = pdfTextExtractor.getTextFromPage(pageNumber); if (extractedText != null && extractedText.trim().length() > 0) { returnValue = PAGE_START_MARKER + extractedText; } // end if } else { // TODO: Add own exception. throw new Exception("The given page number (" + pageNumber + ") " + "is not in the range of valid pages (1.." + numberOfPages + ")."); } // end if..else } else { // TODO: Add own exception. throw new Exception("There is no open PDF to work with."); } // end if..else return returnValue; }
From source file:net.laubenberger.bogatyr.helper.HelperPdf.java
License:Open Source License
/** * Returns the text of a given PDF as {@link String}. * //w w w .j av a2 s . c o m * @param file * input as PDF * @return text of the given PDF * @throws IOException * @see File * @since 0.9.3 */ public static String getText(final File file) throws IOException { // $JUnit$ if (log.isDebugEnabled()) log.debug(HelperLog.methodStart(file)); if (null == file) { throw new RuntimeExceptionIsNull("file"); //$NON-NLS-1$ } final PdfReader pdfReader = new PdfReader(file.getAbsolutePath()); final PdfTextExtractor pdfExtractor = new PdfTextExtractor(pdfReader); final StringBuilder sb = new StringBuilder(); for (int page = 1; page <= pdfReader.getNumberOfPages(); page++) { sb.append(pdfExtractor.getTextFromPage(page)); sb.append(HelperString.NEW_LINE); } final String result = sb.toString(); if (log.isDebugEnabled()) log.debug(HelperLog.methodExit(result)); return result; }