List of usage examples for org.apache.poi.hwpf.usermodel CharacterRun isStrikeThrough
public boolean isStrikeThrough()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java
License:Apache License
private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException { // Skip trailing newlines if (!isRendered(cr) || cr.text().equals("\r")) return;//from w w w. j a v a 2 s. c om if (!skipStyling) { if (cr.isBold() != curBold) { // Enforce nesting -- must close s and i tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (cr.isBold()) { xhtml.startElement("b"); } else { xhtml.endElement("b"); } curBold = cr.isBold(); } if (cr.isItalic() != curItalic) { // Enforce nesting -- must close s tag if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (cr.isItalic()) { xhtml.startElement("i"); } else { xhtml.endElement("i"); } curItalic = cr.isItalic(); } if (cr.isStrikeThrough() != curStrikeThrough) { if (cr.isStrikeThrough()) { xhtml.startElement("s"); } else { xhtml.endElement("s"); } curStrikeThrough = cr.isStrikeThrough(); } } // Clean up the text String text = cr.text(); text = text.replace('\r', '\n'); if (text.endsWith("\u0007")) { // Strip the table cell end marker text = text.substring(0, text.length() - 1); } // Copied from POI's // org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters: // line tabulator as break line text = text.replace((char) 0x000b, '\n'); // Non-breaking hyphens are returned as char 30 text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN); // Non-required hyphens to zero-width space text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE); // TODO: mj xhtml.characters(text); }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException { // Skip trailing newlines if (!isRendered(cr) || cr.text().equals("\r")) return;//w w w . ja v a 2 s .c o m if (!skipStyling) { if (cr.isBold() != curBold) { // Enforce nesting -- must close s and i tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (cr.isBold()) { xhtml.startElement("b"); } else { xhtml.endElement("b"); } curBold = cr.isBold(); } if (cr.isItalic() != curItalic) { // Enforce nesting -- must close s tag if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (cr.isItalic()) { xhtml.startElement("i"); } else { xhtml.endElement("i"); } curItalic = cr.isItalic(); } if (cr.isStrikeThrough() != curStrikeThrough) { if (cr.isStrikeThrough()) { xhtml.startElement("s"); } else { xhtml.endElement("s"); } curStrikeThrough = cr.isStrikeThrough(); } } // Clean up the text String text = cr.text(); text = text.replace('\r', '\n'); if (text.endsWith("\u0007")) { // Strip the table cell end marker text = text.substring(0, text.length() - 1); } // Copied from POI's org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters: // Non-breaking hyphens are returned as char 30 text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN); // Non-required hyphens to zero-width space text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE); // Control characters as line break text = text.replaceAll("[\u0000-\u001f]", "\n"); xhtml.characters(text); }