com.enonic.cms.plugin.extractor.PdfExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.enonic.cms.plugin.extractor.PdfExtractor.java

Source

/*
 * Copyright 2000-2011 Enonic AS
 * http://www.enonic.com/license
 */
package com.enonic.cms.plugin.extractor;

import java.io.IOException;
import java.io.InputStream;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import com.enonic.cms.api.plugin.ext.TextExtractor;

public class PdfExtractor extends TextExtractor {
    @Override
    public boolean canHandle(String mimeType) {
        if ("application/pdf".equals(mimeType)) {
            return true;
        } else {
            return false;
        }
    }

    @Override
    public String extractText(String mimeType, InputStream inputStream, String encoding) throws IOException {
        if (canHandle(mimeType)) {
            PDDocument doc = PDDocument.load(inputStream);
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(doc);
            doc.close();
            return text;
        } else {
            return null;
        }
    }
}