Example usage for org.apache.poi.hwpf.usermodel Paragraph isInList

List of usage examples for org.apache.poi.hwpf.usermodel Paragraph isInList

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.usermodel Paragraph isInList.

Prototype

public boolean isInList() 

Source Link

Usage

From source file:org.apache.tika.parser.microsoft.ListManager.java

License:Apache License

/**
 * Get the formatted number for a given paragraph
 * <p/>//from   ww w.j  a va 2  s .  c  o m
 * <p><em>Note:</em> This only works correctly if called subsequently for <em>all</em> paragraphs in a valid selection (main document, text field, ...) which are part of a list.</p>
 *
 * @param paragraph list paragraph to process
 * @return String which represents the numbering of this list paragraph; never {@code null}, can be empty string, though, 
 *        if something goes wrong in getList()
 * @throws IllegalArgumentException If the given paragraph is {@code null} or is not part of a list
 */
public String getFormattedNumber(final Paragraph paragraph) {
    if (paragraph == null)
        throw new IllegalArgumentException("Given paragraph cannot be null.");
    if (!paragraph.isInList())
        throw new IllegalArgumentException("Can only process list paragraphs.");
    //lsid is equivalent to docx's abnum
    //ilfo is equivalent to docx's num
    int currAbNumId = -1;
    try {
        currAbNumId = paragraph.getList().getLsid();
    } catch (NoSuchElementException e) {
        //somewhat frequent exception when initializing HWPFList
        return "";
    } catch (IllegalArgumentException e) {
        return "";
    } catch (NullPointerException e) {
        return "";
    }

    int currNumId = paragraph.getIlfo();
    ParagraphLevelCounter lc = listLevelMap.get(currAbNumId);
    LevelTuple[] overrideTuples = overrideTupleMap.get(currNumId);

    if (lc == null) {
        ListData listData = listTables.getListData(paragraph.getList().getLsid());
        LevelTuple[] levelTuples = new LevelTuple[listData.getLevels().length];
        for (int i = 0; i < listData.getLevels().length; i++) {
            levelTuples[i] = buildTuple(i, listData.getLevels()[i]);
        }
        lc = new ParagraphLevelCounter(levelTuples);
    }
    if (overrideTuples == null) {
        overrideTuples = buildOverrideTuples(paragraph, lc.getNumberOfLevels());
    }
    String formattedString = lc.incrementLevel(paragraph.getIlvl(), overrideTuples);

    listLevelMap.put(currAbNumId, lc);
    overrideTupleMap.put(currNumId, overrideTuples);
    return formattedString;
}

From source file:org.apache.tika.parser.microsoft.WordExtractor.java

License:Apache License

private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document,
        FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable,
        ListManager listManager, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
    // Note - a poi bug means we can't currently properly recurse
    //  into nested tables, so currently we don't
    if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) {
        Table t = r.getTable(p);//w ww.  j a  v  a  2s . c om
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        for (int rn = 0; rn < t.numRows(); rn++) {
            TableRow row = t.getRow(rn);
            xhtml.startElement("tr");
            for (int cn = 0; cn < row.numCells(); cn++) {
                TableCell cell = row.getCell(cn);
                xhtml.startElement("td");

                for (int pn = 0; pn < cell.numParagraphs(); pn++) {
                    Paragraph cellP = cell.getParagraph(pn);
                    handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable,
                            listManager, xhtml);
                }
                xhtml.endElement("td");
            }
            xhtml.endElement("tr");
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        return (t.numParagraphs() - 1);
    }

    String text = p.text();
    if (text.replaceAll("[\\r\\n\\s]+", "").isEmpty()) {
        // Skip empty paragraphs
        return 0;
    }

    TagAndStyle tas;
    String numbering = null;

    if (document.getStyleSheet().numStyles() > p.getStyleIndex()) {
        StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex());
        if (style != null && style.getName() != null && style.getName().length() > 0) {
            if (p.isInList()) {
                numbering = listManager.getFormattedNumber(p);
            }
            tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0));
        } else {
            tas = new TagAndStyle("p", null);
        }
    } else {
        tas = new TagAndStyle("p", null);
    }

    if (tas.getStyleClass() != null) {
        xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
    } else {
        xhtml.startElement(tas.getTag());
    }

    if (numbering != null) {
        xhtml.characters(numbering);
    }

    for (int j = 0; j < p.numCharacterRuns(); j++) {
        CharacterRun cr = p.getCharacterRun(j);

        // FIELD_BEGIN_MARK:
        if (cr.text().getBytes(UTF_8)[0] == 0x13) {
            Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset());
            // 58 is an embedded document
            // 56 is a document link
            if (field != null && (field.getType() == 58 || field.getType() == 56)) {
                // Embedded Object: add a <div
                // class="embedded" id="_X"/> so consumer can see where
                // in the main text each embedded document
                // occurred:
                String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset();
                AttributesImpl attributes = new AttributesImpl();
                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
                attributes.addAttribute("", "id", "id", "CDATA", id);
                xhtml.startElement("div", attributes);
                xhtml.endElement("div");
            }
        }

        if (cr.text().equals("\u0013")) {
            j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml);
        } else if (cr.text().startsWith("\u0008")) {
            // Floating Picture(s)
            for (int pn = 0; pn < cr.text().length(); pn++) {
                // Assume they're in the order from the unclaimed list...
                Picture picture = pictures.nextUnclaimed();

                // Output
                handlePictureCharacterRun(cr, picture, pictures, xhtml);
            }
        } else if (pictureTable.hasPicture(cr)) {
            // Inline Picture
            Picture picture = pictures.getFor(cr);
            handlePictureCharacterRun(cr, picture, pictures, xhtml);
        } else {
            handleCharacterRun(cr, tas.isHeading(), xhtml);
        }
    }

    // Close any still open style tags
    if (curStrikeThrough) {
        xhtml.endElement("s");
        curStrikeThrough = false;
    }
    if (curItalic) {
        xhtml.endElement("i");
        curItalic = false;
    }
    if (curBold) {
        xhtml.endElement("b");
        curBold = false;
    }

    xhtml.endElement(tas.getTag());

    return 0;
}