Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.mycompany.textextract; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; /** *Takes a folder of pdf documents and a target, and produces text documents of their contents. If there is a text document with the same name already in the folder, * it skips that file. * @author sschick */ public class textExtract { public static void main(String[] args) throws IOException { File folder = new File(args[0]); File target = new File(args[1]); String[] files = folder.list(); for (String filename : files) { if (new File(filename).isFile() && !Arrays.asList(target.list()).contains(filename + ".txt")) { try { PDDocument doc = PDDocument.load(folder.getAbsolutePath() + "/" + filename); PDFTextStripper strip = new PDFTextStripper(); PrintWriter writer = new PrintWriter(target.getAbsolutePath() + "/" + filename + ".txt", "UTF-8"); writer.print(strip.getText(doc)); doc.close(); writer.close(); } catch (Exception e) { System.out.println("writing failed-" + e.toString()); } } } } }