Example usage for org.apache.poi.hwpf.extractor WordExtractor getFootnoteText

List of usage examples for org.apache.poi.hwpf.extractor WordExtractor getFootnoteText

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.extractor WordExtractor getFootnoteText.

Prototype

public String[] getFootnoteText() 

Source Link

Usage

From source file:org.olat.search.service.document.file.WordDocument.java

License:Apache License

private void collectWordDocument(final POIFSFileSystem filesystem, final StringBuilder sb) throws IOException {
    final WordExtractor extractor = new WordExtractor(filesystem);
    addTextIfAny(sb, extractor.getHeaderText());
    for (final String paragraph : extractor.getParagraphText()) {
        sb.append(paragraph).append(' ');
    }/*from  w  w w.ja v  a 2 s. co m*/

    for (final String paragraph : extractor.getFootnoteText()) {
        sb.append(paragraph).append(' ');
    }

    for (final String paragraph : extractor.getCommentsText()) {
        sb.append(paragraph).append(' ');
    }

    for (final String paragraph : extractor.getEndnoteText()) {
        sb.append(paragraph).append(' ');
    }
    addTextIfAny(sb, extractor.getFooterText());
}

From source file:uk.ac.liverpool.MSOffice.MSWord.java

License:Open Source License

private String toHTML(INode parent) {
    HWPFDocument wor = (HWPFDocument) parent.getDocument().getValue("worddoc");
    WordExtractor wx = new WordExtractor(wor);
    StringBuilder b = new StringBuilder();

    b.append("<html><head>" + "<META http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">"
            + "<style type=\"text/css\">\n" + "body {\n" + "   color: black; background-color: white;\n"
            + "   font-size: 14pts;\n" + "   padding: 10px;}\n" + "\n" + "a:link { color: blue; }\n"
            + "a:visited { color: magenta; }\n" + "a:hover { color: red; }\n" + "a:active { color: red; }\n"
            + "\n" + "a:link, a:visited, \n" + "a:active, a:hover {\n" + "   text-decoration: underline;\n"
            + "}\n" + "\n" + "p {\n" + "   margin-top: 10px;\n" + "}\n" + "text { padding: 5px; }\n" + "\n"
            + "pre { font-family: monospace; }\n" + "\n\n"
            + "h1 { font-size: 24pt; font-weight: bold; margin: 10px 0px; }\n"
            + "h2 { font-size: 18pt; font-weight: bold; margin: 9px 0px; }\n"
            + "h3 { font-size: 14pt; font-weight: bold; margin: 7px 0px; }\n"
            + "h4 { font-size: 12pt; font-weight: bold; margin: 6px 0px; }\n"
            + "h5 { font-size: 10pt; font-weight: bold; margin: 5px 0px; }\n"
            + "h6 { font-size:  9pt; font-weight: bold; margin: 5px 0px; }\n" + "" + "" + "</style>");
    b.append("<title>").append("Text extracion contents of the word document (APACHE POI):").append("</title>");
    b.append("</head>\n");
    b.append("<body>\n");
    b.append("<p>").append(wx.getHeaderText()).append("</p>\n");
    ArrayList<String> text = new ArrayList<String>();
    text.addAll(Arrays.asList(wx.getParagraphText()));
    text.addAll(Arrays.asList(wx.getFootnoteText()));
    text.addAll(Arrays.asList(wx.getEndnoteText()));

    for (String p : text) {
        b.append("<p>").append(p).append("</p>\n");
    }/*w  ww.ja v  a2 s  .  c om*/
    b.append("</body></html>");
    return b.toString();
}