Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.ntc.utils; import java.io.File; import java.io.IOException; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; /** * * @author ugr */ public class PdfTextExtractor extends TextExtractor { private int numberOfPages; private PDFTextStripper stripper; private PDDocument document; private int currentPage; public PdfTextExtractor(File file) { try { document = PDDocument.load(file); numberOfPages = document.getNumberOfPages(); stripper = new PDFTextStripper(); } catch (IOException ex) { Logger.getLogger(PdfTextExtractor.class.getName()).log(Level.SEVERE, null, ex); } } @Override public StringBuilder getNextString() { currentPage++; if (currentPage > numberOfPages) { try { document.close(); return null; } catch (IOException ex) { Logger.getLogger(PdfTextExtractor.class.getName()).log(Level.SEVERE, null, ex); return null; } } else { try { stripper.setStartPage(currentPage); stripper.setEndPage(currentPage); String tempString = stripper.getText(document); if (tempString == null) return null; else return new StringBuilder(tempString); } catch (IOException ex) { Logger.getLogger(PdfTextExtractor.class.getName()).log(Level.SEVERE, null, ex); return null; } } } }