List of usage examples for org.apache.pdfbox.text PDFTextStripper setPageEnd
public void setPageEnd(String pageEndValue)
From source file:extractor.pdftotext.PdfToText.java
private String getPdfBoxRaw(File file) { try {//w w w. j a v a2 s.c o m PDDocument doc = PDDocument.load(file); PDFTextStripper stripper = new PDFTextStripper(); stripper.setPageStart("PAGE START"); stripper.setPageEnd("PAGE END"); //gets the text form the doc and replaces unknown signs with \n String rawText = stripper.getText(doc).replaceAll("[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cn}]", "\n"); doc.close(); return rawText; } catch (IOException ex) { Logger.getLogger(PdfToText.class.getName()).log(Level.SEVERE, null, ex); } return ""; }