List of usage examples for org.apache.poi.hwpf.usermodel Paragraph isInList
public boolean isInList()
From source file:org.apache.tika.parser.microsoft.ListManager.java
License:Apache License
/** * Get the formatted number for a given paragraph * <p/>//from ww w.j a va 2 s . c o m * <p><em>Note:</em> This only works correctly if called subsequently for <em>all</em> paragraphs in a valid selection (main document, text field, ...) which are part of a list.</p> * * @param paragraph list paragraph to process * @return String which represents the numbering of this list paragraph; never {@code null}, can be empty string, though, * if something goes wrong in getList() * @throws IllegalArgumentException If the given paragraph is {@code null} or is not part of a list */ public String getFormattedNumber(final Paragraph paragraph) { if (paragraph == null) throw new IllegalArgumentException("Given paragraph cannot be null."); if (!paragraph.isInList()) throw new IllegalArgumentException("Can only process list paragraphs."); //lsid is equivalent to docx's abnum //ilfo is equivalent to docx's num int currAbNumId = -1; try { currAbNumId = paragraph.getList().getLsid(); } catch (NoSuchElementException e) { //somewhat frequent exception when initializing HWPFList return ""; } catch (IllegalArgumentException e) { return ""; } catch (NullPointerException e) { return ""; } int currNumId = paragraph.getIlfo(); ParagraphLevelCounter lc = listLevelMap.get(currAbNumId); LevelTuple[] overrideTuples = overrideTupleMap.get(currNumId); if (lc == null) { ListData listData = listTables.getListData(paragraph.getList().getLsid()); LevelTuple[] levelTuples = new LevelTuple[listData.getLevels().length]; for (int i = 0; i < listData.getLevels().length; i++) { levelTuples[i] = buildTuple(i, listData.getLevels()[i]); } lc = new ParagraphLevelCounter(levelTuples); } if (overrideTuples == null) { overrideTuples = buildOverrideTuples(paragraph, lc.getNumberOfLevels()); } String formattedString = lc.incrementLevel(paragraph.getIlvl(), overrideTuples); listLevelMap.put(currAbNumId, lc); overrideTupleMap.put(currNumId, overrideTuples); return formattedString; }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, ListManager listManager, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { // Note - a poi bug means we can't currently properly recurse // into nested tables, so currently we don't if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) { Table t = r.getTable(p);//w ww. j a v a 2s . c om xhtml.startElement("table"); xhtml.startElement("tbody"); for (int rn = 0; rn < t.numRows(); rn++) { TableRow row = t.getRow(rn); xhtml.startElement("tr"); for (int cn = 0; cn < row.numCells(); cn++) { TableCell cell = row.getCell(cn); xhtml.startElement("td"); for (int pn = 0; pn < cell.numParagraphs(); pn++) { Paragraph cellP = cell.getParagraph(pn); handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, listManager, xhtml); } xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); return (t.numParagraphs() - 1); } String text = p.text(); if (text.replaceAll("[\\r\\n\\s]+", "").isEmpty()) { // Skip empty paragraphs return 0; } TagAndStyle tas; String numbering = null; if (document.getStyleSheet().numStyles() > p.getStyleIndex()) { StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (style != null && style.getName() != null && style.getName().length() > 0) { if (p.isInList()) { numbering = listManager.getFormattedNumber(p); } tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0)); } else { tas = new TagAndStyle("p", null); } } else { tas = new TagAndStyle("p", null); } if (tas.getStyleClass() != null) { xhtml.startElement(tas.getTag(), "class", tas.getStyleClass()); } else { xhtml.startElement(tas.getTag()); } if (numbering != null) { xhtml.characters(numbering); } for (int j = 0; j < p.numCharacterRuns(); j++) { CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: if (cr.text().getBytes(UTF_8)[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link if (field != null && (field.getType() == 58 || field.getType() == 56)) { // Embedded Object: add a <div // class="embedded" id="_X"/> so consumer can see where // in the main text each embedded document // occurred: String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", id); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } if (cr.text().equals("\u0013")) { j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml); } else if (cr.text().startsWith("\u0008")) { // Floating Picture(s) for (int pn = 0; pn < cr.text().length(); pn++) { // Assume they're in the order from the unclaimed list... Picture picture = pictures.nextUnclaimed(); // Output handlePictureCharacterRun(cr, picture, pictures, xhtml); } } else if (pictureTable.hasPicture(cr)) { // Inline Picture Picture picture = pictures.getFor(cr); handlePictureCharacterRun(cr, picture, pictures, xhtml); } else { handleCharacterRun(cr, tas.isHeading(), xhtml); } } // Close any still open style tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (curBold) { xhtml.endElement("b"); curBold = false; } xhtml.endElement(tas.getTag()); return 0; }