List of usage examples for org.apache.poi.xwpf.usermodel XWPFParagraph getPartType
@Override
public BodyType getPartType()
From source file:fr.opensagres.poi.xwpf.converter.core.styles.paragraph.AbstractIndentationParagraphValueProvider.java
License:Open Source License
@Override protected StringBuilder getKeyBuffer(XWPFParagraph element, XWPFStylesDocument stylesDocument, String styleId, Enum type) {//from w ww. j a va2s . com if (element.getPartType() == BodyType.TABLECELL) { return super.getKeyBuffer(element, stylesDocument, styleId, type).append("_cell"); } return super.getKeyBuffer(element, stylesDocument, styleId, type); }
From source file:fr.opensagres.poi.xwpf.converter.core.styles.paragraph.ParagraphSpacingAfterValueProvider.java
License:Open Source License
@Override protected Float getDefaultValue(XWPFParagraph paragraph, XWPFStylesDocument stylesDocument) { if (paragraph.getPartType() == BodyType.TABLECELL) { return null; }/*from www . j av a 2 s. c o m*/ return super.getDefaultValue(paragraph, stylesDocument); }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.java
License:Apache License
private void extractParagraph(XWPFParagraph paragraph, XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { // If this paragraph is actually a whole new section, then // it could have its own headers and footers // Check and handle if so XWPFHeaderFooterPolicy headerFooterPolicy = null; if (paragraph.getCTP().getPPr() != null) { CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr(); if (ctSectPr != null) { headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr); extractHeaders(xhtml, headerFooterPolicy); }/*ww w. j a v a 2s . c om*/ } // Is this a paragraph, or a heading? String tag = "p"; String styleClass = null; if (paragraph.getStyleID() != null) { XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), paragraph.getPartType() == BodyType.TABLECELL); tag = tas.getTag(); styleClass = tas.getStyleClass(); } } if (styleClass == null) { xhtml.startElement(tag); } else { xhtml.startElement(tag, "class", styleClass); } // Output placeholder for any embedded docs: // TODO: replace w/ XPath/XQuery: for (XWPFRun run : paragraph.getRuns()) { XmlCursor c = run.getCTR().newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTObject) { XmlCursor c2 = o.newCursor(); c2.selectPath("./*"); while (c2.toNextSelection()) { XmlObject o2 = c2.getObject(); XmlObject embedAtt = o2.selectAttribute(new QName("Type")); if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) { // Type is "Embed" XmlObject relIDAtt = o2.selectAttribute(new QName( "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id")); if (relIDAtt != null) { String relID = relIDAtt.getDomNode().getNodeValue(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", relID); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } } c2.dispose(); } } c.dispose(); } // Attach bookmarks for the paragraph // (In future, we might put them in the right place, for now // we just put them in the correct paragraph) for (CTBookmark bookmark : paragraph.getCTP().getBookmarkStartList()) { xhtml.startElement("a", "name", bookmark.getName()); xhtml.endElement("a"); } TmpFormatting fmtg = new TmpFormatting(false, false); // Do the iruns for (IRunElement run : paragraph.getIRuns()) { if (run instanceof XWPFSDT) { fmtg = closeStyleTags(xhtml, fmtg); processSDTRun((XWPFSDT) run, xhtml); // for now, we're ignoring formatting in sdt // if you hit an sdt reset to false fmtg.setBold(false); fmtg.setItalic(false); } else { fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg); } } closeStyleTags(xhtml, fmtg); // Now do any comments for the paragraph XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null); String commentText = comments.getCommentText(); if (commentText != null && commentText.length() > 0) { xhtml.characters(commentText); } String footnameText = paragraph.getFootnoteText(); if (footnameText != null && footnameText.length() > 0) { xhtml.characters(footnameText + "\n"); } // Also extract any paragraphs embedded in text boxes: for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath( "declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) { extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), xhtml); } // Finish this paragraph xhtml.endElement(tag); if (headerFooterPolicy != null) { extractFooters(xhtml, headerFooterPolicy); } }
From source file:org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.java
License:Apache License
private void extractParagraph(XWPFParagraph paragraph, XWPFListManager listManager, XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { // If this paragraph is actually a whole new section, then // it could have its own headers and footers // Check and handle if so XWPFHeaderFooterPolicy headerFooterPolicy = null; if (paragraph.getCTP().getPPr() != null) { CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr(); if (ctSectPr != null) { headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr); extractHeaders(xhtml, headerFooterPolicy, listManager); }//from w w w. ja v a 2s .c om } // Is this a paragraph, or a heading? String tag = "p"; String styleClass = null; if (paragraph.getStyleID() != null) { XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), paragraph.getPartType() == BodyType.TABLECELL); tag = tas.getTag(); styleClass = tas.getStyleClass(); } } if (styleClass == null) { xhtml.startElement(tag); } else { xhtml.startElement(tag, "class", styleClass); } writeParagraphNumber(paragraph, listManager, xhtml); // Output placeholder for any embedded docs: // TODO: replace w/ XPath/XQuery: for (XWPFRun run : paragraph.getRuns()) { XmlCursor c = run.getCTR().newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTObject) { XmlCursor c2 = o.newCursor(); c2.selectPath("./*"); while (c2.toNextSelection()) { XmlObject o2 = c2.getObject(); XmlObject embedAtt = o2.selectAttribute(new QName("Type")); if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) { // Type is "Embed" XmlObject relIDAtt = o2.selectAttribute(new QName( "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id")); if (relIDAtt != null) { String relID = relIDAtt.getDomNode().getNodeValue(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", relID); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } } c2.dispose(); } } c.dispose(); } // Attach bookmarks for the paragraph // (In future, we might put them in the right place, for now // we just put them in the correct paragraph) for (int i = 0; i < paragraph.getCTP().sizeOfBookmarkStartArray(); i++) { CTBookmark bookmark = paragraph.getCTP().getBookmarkStartArray(i); xhtml.startElement("a", "name", bookmark.getName()); xhtml.endElement("a"); } TmpFormatting fmtg = new TmpFormatting(false, false); // Do the iruns for (IRunElement run : paragraph.getIRuns()) { if (run instanceof XWPFSDT) { fmtg = closeStyleTags(xhtml, fmtg); processSDTRun((XWPFSDT) run, xhtml); //for now, we're ignoring formatting in sdt //if you hit an sdt reset to false fmtg.setBold(false); fmtg.setItalic(false); } else { fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg); } } closeStyleTags(xhtml, fmtg); // Now do any comments for the paragraph XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null); String commentText = comments.getCommentText(); if (commentText != null && commentText.length() > 0) { xhtml.characters(commentText); } String footnameText = paragraph.getFootnoteText(); if (footnameText != null && footnameText.length() > 0) { xhtml.characters(footnameText + "\n"); } // Also extract any paragraphs embedded in text boxes: for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath( "declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) { extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml); } // Finish this paragraph xhtml.endElement(tag); if (headerFooterPolicy != null) { extractFooters(xhtml, headerFooterPolicy, listManager); } }