Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package PDSL; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintWriter; import java.io.RandomAccessFile; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; /** * Processing PDF to text using pdfbox * * @author sidiksoleman */ public class PDFProcessor { public void pdfToText(String dirFrom, String dirTo) throws IOException { File pdfFolder = new File(dirFrom); File[] listOfPDF = pdfFolder.listFiles(); for (File thePDF : listOfPDF) { PDDocument pdDoc = PDDocument.load(thePDF); PDFTextStripper pdfStripper = new PDFTextStripper(); String parsedText = pdfStripper.getText(pdDoc); PrintWriter out = new PrintWriter(dirTo + "/" + thePDF.getName().replace(".pdf", ".txt")); out.write(parsedText); out.close(); } } public static void main(String[] args) throws IOException { PDFProcessor ps = new PDFProcessor(); ps.pdfToText("../testFrom", "../testTo"); } }