Java tutorial
/* * Copyright 2000-2011 Enonic AS * http://www.enonic.com/license */ package com.enonic.cms.plugin.extractor; import java.io.IOException; import java.io.InputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import com.enonic.cms.api.plugin.ext.TextExtractor; public class PdfExtractor extends TextExtractor { @Override public boolean canHandle(String mimeType) { if ("application/pdf".equals(mimeType)) { return true; } else { return false; } } @Override public String extractText(String mimeType, InputStream inputStream, String encoding) throws IOException { if (canHandle(mimeType)) { PDDocument doc = PDDocument.load(inputStream); PDFTextStripper stripper = new PDFTextStripper(); String text = stripper.getText(doc); doc.close(); return text; } else { return null; } } }