Example usage for org.apache.poi.hwpf.extractor WordExtractor getFooterText

List of usage examples for org.apache.poi.hwpf.extractor WordExtractor getFooterText

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.extractor WordExtractor getFooterText.

Prototype

@Deprecated
public String getFooterText() 

Source Link

Document

Grab the text from the footers

Usage

From source file:net.yacy.document.parser.docParser.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override//ww w. ja  v a 2s.  c o m
public Document[] parse(final AnchorURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, final InputStream source)
        throws Parser.Failure, InterruptedException {

    final WordExtractor extractor;

    try {
        extractor = new WordExtractor(source);
    } catch (final Exception e) {
        throw new Parser.Failure("error in docParser, WordTextExtractorFactory: " + e.getMessage(), location);
    }

    final StringBuilder contents = new StringBuilder(80);
    try {
        contents.append(extractor.getText().trim());
        contents.append(' ');
        contents.append(extractor.getHeaderText());
        contents.append(' ');
        contents.append(extractor.getFooterText());
    } catch (final Exception e) {
        throw new Parser.Failure("error in docParser, getText: " + e.getMessage(), location);
    }
    String title = (contents.length() > 240) ? contents.substring(0, 240) : contents.toString().trim();
    title = title.replaceAll("\r", " ").replaceAll("\n", " ").replaceAll("\t", " ").trim();
    if (title.length() > 80)
        title = title.substring(0, 80);
    int l = title.length();
    while (true) {
        title = title.replaceAll("  ", " ");
        if (title.length() == l)
            break;
        l = title.length();
    }
    // get keywords (for yacy as array)
    final String keywords = extractor.getSummaryInformation().getKeywords();
    final String[] keywlist;
    if (keywords != null && !keywords.isEmpty()) {
        keywlist = CommonPattern.COMMA.split(keywords);
    } else {
        keywlist = null;
    }

    final String subject = extractor.getSummaryInformation().getSubject();
    List<String> descriptions = new ArrayList<String>();
    if (subject != null && !subject.isEmpty())
        descriptions.add(subject);

    Document[] docs;
    docs = new Document[] { new Document(location, mimeType, "UTF-8", this, null, keywlist, singleList(title),
            extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
            extractor.getDocSummaryInformation().getCompany(), // publisher
            null, descriptions, 0.0f, 0.0f, contents.toString(), null, null, null, false, new Date()) };

    return docs;
}

From source file:org.olat.search.service.document.file.WordDocument.java

License:Apache License

private void collectWordDocument(final POIFSFileSystem filesystem, final StringBuilder sb) throws IOException {
    final WordExtractor extractor = new WordExtractor(filesystem);
    addTextIfAny(sb, extractor.getHeaderText());
    for (final String paragraph : extractor.getParagraphText()) {
        sb.append(paragraph).append(' ');
    }/*from  www.j  a  va2 s . c o  m*/

    for (final String paragraph : extractor.getFootnoteText()) {
        sb.append(paragraph).append(' ');
    }

    for (final String paragraph : extractor.getCommentsText()) {
        sb.append(paragraph).append(' ');
    }

    for (final String paragraph : extractor.getEndnoteText()) {
        sb.append(paragraph).append(' ');
    }
    addTextIfAny(sb, extractor.getFooterText());
}