List of usage examples for org.apache.poi.hwpf.extractor WordExtractor getEndnoteText
public String[] getEndnoteText()
From source file:org.olat.search.service.document.file.WordDocument.java
License:Apache License
private void collectWordDocument(final POIFSFileSystem filesystem, final StringBuilder sb) throws IOException { final WordExtractor extractor = new WordExtractor(filesystem); addTextIfAny(sb, extractor.getHeaderText()); for (final String paragraph : extractor.getParagraphText()) { sb.append(paragraph).append(' '); }//from w w w . ja v a 2 s . c om for (final String paragraph : extractor.getFootnoteText()) { sb.append(paragraph).append(' '); } for (final String paragraph : extractor.getCommentsText()) { sb.append(paragraph).append(' '); } for (final String paragraph : extractor.getEndnoteText()) { sb.append(paragraph).append(' '); } addTextIfAny(sb, extractor.getFooterText()); }
From source file:uk.ac.liverpool.MSOffice.MSWord.java
License:Open Source License
private String toHTML(INode parent) { HWPFDocument wor = (HWPFDocument) parent.getDocument().getValue("worddoc"); WordExtractor wx = new WordExtractor(wor); StringBuilder b = new StringBuilder(); b.append("<html><head>" + "<META http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" + "<style type=\"text/css\">\n" + "body {\n" + " color: black; background-color: white;\n" + " font-size: 14pts;\n" + " padding: 10px;}\n" + "\n" + "a:link { color: blue; }\n" + "a:visited { color: magenta; }\n" + "a:hover { color: red; }\n" + "a:active { color: red; }\n" + "\n" + "a:link, a:visited, \n" + "a:active, a:hover {\n" + " text-decoration: underline;\n" + "}\n" + "\n" + "p {\n" + " margin-top: 10px;\n" + "}\n" + "text { padding: 5px; }\n" + "\n" + "pre { font-family: monospace; }\n" + "\n\n" + "h1 { font-size: 24pt; font-weight: bold; margin: 10px 0px; }\n" + "h2 { font-size: 18pt; font-weight: bold; margin: 9px 0px; }\n" + "h3 { font-size: 14pt; font-weight: bold; margin: 7px 0px; }\n" + "h4 { font-size: 12pt; font-weight: bold; margin: 6px 0px; }\n" + "h5 { font-size: 10pt; font-weight: bold; margin: 5px 0px; }\n" + "h6 { font-size: 9pt; font-weight: bold; margin: 5px 0px; }\n" + "" + "" + "</style>"); b.append("<title>").append("Text extracion contents of the word document (APACHE POI):").append("</title>"); b.append("</head>\n"); b.append("<body>\n"); b.append("<p>").append(wx.getHeaderText()).append("</p>\n"); ArrayList<String> text = new ArrayList<String>(); text.addAll(Arrays.asList(wx.getParagraphText())); text.addAll(Arrays.asList(wx.getFootnoteText())); text.addAll(Arrays.asList(wx.getEndnoteText())); for (String p : text) { b.append("<p>").append(p).append("</p>\n"); }// w w w.j a va2s. c o m b.append("</body></html>"); return b.toString(); }