Example usage for org.apache.poi.hwpf.usermodel CharacterRun isStrikeThrough

List of usage examples for org.apache.poi.hwpf.usermodel CharacterRun isStrikeThrough

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.usermodel CharacterRun isStrikeThrough.

Prototype

public boolean isStrikeThrough() 

Source Link

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java

License:Apache License

private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml)
        throws SAXException {
    // Skip trailing newlines
    if (!isRendered(cr) || cr.text().equals("\r"))
        return;//from   w  w w.  j a v  a  2  s. c  om

    if (!skipStyling) {
        if (cr.isBold() != curBold) {
            // Enforce nesting -- must close s and i tags
            if (curStrikeThrough) {
                xhtml.endElement("s");
                curStrikeThrough = false;
            }
            if (curItalic) {
                xhtml.endElement("i");
                curItalic = false;
            }
            if (cr.isBold()) {
                xhtml.startElement("b");
            } else {
                xhtml.endElement("b");
            }
            curBold = cr.isBold();
        }

        if (cr.isItalic() != curItalic) {
            // Enforce nesting -- must close s tag
            if (curStrikeThrough) {
                xhtml.endElement("s");
                curStrikeThrough = false;
            }
            if (cr.isItalic()) {
                xhtml.startElement("i");
            } else {
                xhtml.endElement("i");
            }
            curItalic = cr.isItalic();
        }

        if (cr.isStrikeThrough() != curStrikeThrough) {
            if (cr.isStrikeThrough()) {
                xhtml.startElement("s");
            } else {
                xhtml.endElement("s");
            }
            curStrikeThrough = cr.isStrikeThrough();
        }
    }

    // Clean up the text
    String text = cr.text();
    text = text.replace('\r', '\n');
    if (text.endsWith("\u0007")) {
        // Strip the table cell end marker
        text = text.substring(0, text.length() - 1);
    }

    // Copied from POI's
    // org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters:

    // line tabulator as break line
    text = text.replace((char) 0x000b, '\n');

    // Non-breaking hyphens are returned as char 30
    text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN);

    // Non-required hyphens to zero-width space
    text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE);

    // TODO: mj
    xhtml.characters(text);
}

From source file:org.apache.tika.parser.microsoft.WordExtractor.java

License:Apache License

private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml)
        throws SAXException {
    // Skip trailing newlines
    if (!isRendered(cr) || cr.text().equals("\r"))
        return;//w w  w . ja  v a 2 s  .c o m

    if (!skipStyling) {
        if (cr.isBold() != curBold) {
            // Enforce nesting -- must close s and i tags
            if (curStrikeThrough) {
                xhtml.endElement("s");
                curStrikeThrough = false;
            }
            if (curItalic) {
                xhtml.endElement("i");
                curItalic = false;
            }
            if (cr.isBold()) {
                xhtml.startElement("b");
            } else {
                xhtml.endElement("b");
            }
            curBold = cr.isBold();
        }

        if (cr.isItalic() != curItalic) {
            // Enforce nesting -- must close s tag
            if (curStrikeThrough) {
                xhtml.endElement("s");
                curStrikeThrough = false;
            }
            if (cr.isItalic()) {
                xhtml.startElement("i");
            } else {
                xhtml.endElement("i");
            }
            curItalic = cr.isItalic();
        }

        if (cr.isStrikeThrough() != curStrikeThrough) {
            if (cr.isStrikeThrough()) {
                xhtml.startElement("s");
            } else {
                xhtml.endElement("s");
            }
            curStrikeThrough = cr.isStrikeThrough();
        }
    }

    // Clean up the text
    String text = cr.text();
    text = text.replace('\r', '\n');
    if (text.endsWith("\u0007")) {
        // Strip the table cell end marker
        text = text.substring(0, text.length() - 1);
    }

    // Copied from POI's org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters:

    // Non-breaking hyphens are returned as char 30
    text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN);

    // Non-required hyphens to zero-width space
    text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE);

    // Control characters as line break
    text = text.replaceAll("[\u0000-\u001f]", "\n");
    xhtml.characters(text);
}