org.deepfs.fsml.xdcr.PDFTransducer.java Source code

Java tutorial

Introduction

Here is the source code for org.deepfs.fsml.xdcr.PDFTransducer.java

Source

/**
 *
 */
package org.deepfs.fsml.xdcr;

import java.io.IOException;
import java.io.StringWriter;
import java.text.SimpleDateFormat;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.util.PDFTextStripper;

/**
 * This class implements metadata extraction for PDF documents.
 *
 * @author Copyright (c) 2011, michael, ISC license
 *
 */
public class PDFTransducer extends Transducer {

    private static final int NO_PAGES = 2;

    private static final String NS = "fsdoc:";

    /**
     * This is sparta.
     */
    public PDFTransducer() {
        super("application/pdf");
        // TODO Auto-generated constructor stub
    }

    /* (non-Javadoc)
     *
     * @see org.deepfs.fsml.xdcr.Transducer#read(java.lang.String) */
    @Override
    public String read(final String path) {
        PDDocument pd = null;
        final StringWriter sw = new StringWriter();
        final StringBuilder sb = new StringBuilder(128);
        try {
            pd = PDDocument.load(path);
            PDDocumentInformation info = pd.getDocumentInformation();
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setEndPage(NO_PAGES);
            stripper.writeText(pd, sw);
            sb.append(keyValue(NS + "title", info.getTitle()));
            sb.append(keyValue(NS + "subject", info.getSubject()));
            sb.append(keyValue(NS + "creator", info.getCreator()));
            sb.append(keyValue(NS + "author", info.getAuthor()));
            sb.append(keyValue(NS + "producer", info.getProducer()));
            sb.append(keyValue(NS + "date",
                    info.getCreationDate() != null ? new SimpleDateFormat().format(info.getCreationDate().getTime())
                            : null));
            sb.append(keyValue(NS + "content", sw.getBuffer().toString()));

            sb.append(keyValue(NS + "keywords", info.getKeywords()));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (IndexOutOfBoundsException oe) {
            // [MS] thanks apache pdfbox :-)
            System.err.println(oe.getMessage());
        }
        return sb.toString();
    }

    /* (non-Javadoc)
     *
     * @see org.deepfs.fsml.xdcr.Transducer#write(java.lang.String,
     * java.lang.String) */
    @Override
    public boolean write(final String path, final String xml) {
        // TODO Auto-generated method stub
        return false;
    }

    /* (non-Javadoc)
     *
     * @see org.deepfs.fsml.xdcr.Transducer#template() */
    @Override
    public String template() {
        // TODO Auto-generated method stub
        return null;
    }

    /* (non-Javadoc)
     *
     * @see org.deepfs.fsml.xdcr.Transducer#rng() */
    @Override
    public String rng() {
        // TODO Auto-generated method stub
        return null;
    }

}