List of usage examples for org.apache.pdfbox.text PDFTextStripper setPageStart
public void setPageStart(String pageStartValue)
From source file:extractor.pdftotext.PdfToText.java
private String getPdfBoxRaw(File file) { try {/* ww w . ja v a 2s .c om*/ PDDocument doc = PDDocument.load(file); PDFTextStripper stripper = new PDFTextStripper(); stripper.setPageStart("PAGE START"); stripper.setPageEnd("PAGE END"); //gets the text form the doc and replaces unknown signs with \n String rawText = stripper.getText(doc).replaceAll("[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cn}]", "\n"); doc.close(); return rawText; } catch (IOException ex) { Logger.getLogger(PdfToText.class.getName()).log(Level.SEVERE, null, ex); } return ""; }