List of usage examples for org.apache.poi.hwpf.usermodel Paragraph isInTable
public boolean isInTable()
From source file:com.example.minireader.WordViewActivity.java
License:Apache License
public void readAndWrite() { try {//from w w w.j av a 2s. c om myFile = new File(htmlPath); output = new FileOutputStream(myFile); String head = "<html><body>"; String tagBegin = "<p>"; String tagEnd = "</p>"; output.write(head.getBytes()); // int numParagraphs = range.numParagraphs(); // for (int i = 0; i < numParagraphs; i++) { Paragraph p = range.getParagraph(i); // if (p.isInTable()) { int temp = i; if (tableIterator.hasNext()) { String tableBegin = "<table style=\"border-collapse:collapse\" border=1 bordercolor=\"black\">"; String tableEnd = "</table>"; String rowBegin = "<tr>"; String rowEnd = "</tr>"; String colBegin = "<td>"; String colEnd = "</td>"; Table table = tableIterator.next(); // output.write(tableBegin.getBytes()); int rows = table.numRows(); // for (int r = 0; r < rows; r++) { output.write(rowBegin.getBytes()); TableRow row = table.getRow(r); int cols = row.numCells(); int rowNumParagraphs = row.numParagraphs(); int colsNumParagraphs = 0; // for (int c = 0; c < cols; c++) { output.write(colBegin.getBytes()); TableCell cell = row.getCell(c); int max = temp + cell.numParagraphs(); colsNumParagraphs = colsNumParagraphs + cell.numParagraphs(); for (int cp = temp; cp < max; cp++) { Paragraph p1 = range.getParagraph(cp); output.write(tagBegin.getBytes()); writeParagraphContent(p1); output.write(tagEnd.getBytes()); temp++; } output.write(colEnd.getBytes()); } int max1 = temp + rowNumParagraphs; for (int m = temp + colsNumParagraphs; m < max1; m++) { Paragraph p2 = range.getParagraph(m); temp++; } output.write(rowEnd.getBytes()); } output.write(tableEnd.getBytes()); } i = temp; } else { output.write(tagBegin.getBytes()); writeParagraphContent(p); output.write(tagEnd.getBytes()); } } String end = "</body></html>"; output.write(end.getBytes()); output.close(); } catch (Exception e) { System.out.println("readAndWrite Exception"); e.printStackTrace(); } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java
License:Apache License
private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { // Note - a poi bug means we can't currently properly recurse // into nested tables, so currently we don't if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) { Table t = r.getTable(p);//from w w w. jav a 2s. c o m xhtml.startElement("table"); xhtml.startElement("tbody"); for (int rn = 0; rn < t.numRows(); rn++) { TableRow row = t.getRow(rn); xhtml.startElement("tr"); for (int cn = 0; cn < row.numCells(); cn++) { TableCell cell = row.getCell(cn); xhtml.startElement("td"); for (int pn = 0; pn < cell.numParagraphs(); pn++) { Paragraph cellP = cell.getParagraph(pn); handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, xhtml); } xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); return (t.numParagraphs() - 1); } TagAndStyle tas; if (document.getStyleSheet().numStyles() > p.getStyleIndex()) { StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (style != null && style.getName() != null && style.getName().length() > 0) { tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0)); } else { tas = new TagAndStyle("p", null); } } else { tas = new TagAndStyle("p", null); } if (tas.getStyleClass() != null) { xhtml.startElement(tas.getTag(), "class", tas.getStyleClass()); } else { xhtml.startElement(tas.getTag()); } for (int j = 0; j < p.numCharacterRuns(); j++) { CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: if (cr.text().getBytes()[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link if (field != null && (field.getType() == 58 || field.getType() == 56)) { // Embedded Object: add a <div // class="embedded" id="_X"/> so consumer can see where // in the main text each embedded document // occurred: String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", id); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } if (cr.text().equals("\u0013")) { j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml); } else if (cr.text().startsWith("\u0008")) { // Floating Picture(s) for (int pn = 0; pn < cr.text().length(); pn++) { // Assume they're in the order from the unclaimed list... Picture picture = pictures.nextUnclaimed(); // Output handlePictureCharacterRun(cr, picture, pictures, xhtml); } } else if (pictureTable.hasPicture(cr)) { // Inline Picture Picture picture = pictures.getFor(cr); handlePictureCharacterRun(cr, picture, pictures, xhtml); } else { handleCharacterRun(cr, tas.isHeading(), xhtml); } } // Close any still open style tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (curBold) { xhtml.endElement("b"); curBold = false; } xhtml.endElement(tas.getTag()); return 0; }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, ListManager listManager, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { // Note - a poi bug means we can't currently properly recurse // into nested tables, so currently we don't if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) { Table t = r.getTable(p);//from w ww . j a va 2s .com xhtml.startElement("table"); xhtml.startElement("tbody"); for (int rn = 0; rn < t.numRows(); rn++) { TableRow row = t.getRow(rn); xhtml.startElement("tr"); for (int cn = 0; cn < row.numCells(); cn++) { TableCell cell = row.getCell(cn); xhtml.startElement("td"); for (int pn = 0; pn < cell.numParagraphs(); pn++) { Paragraph cellP = cell.getParagraph(pn); handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, listManager, xhtml); } xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); return (t.numParagraphs() - 1); } String text = p.text(); if (text.replaceAll("[\\r\\n\\s]+", "").isEmpty()) { // Skip empty paragraphs return 0; } TagAndStyle tas; String numbering = null; if (document.getStyleSheet().numStyles() > p.getStyleIndex()) { StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (style != null && style.getName() != null && style.getName().length() > 0) { if (p.isInList()) { numbering = listManager.getFormattedNumber(p); } tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0)); } else { tas = new TagAndStyle("p", null); } } else { tas = new TagAndStyle("p", null); } if (tas.getStyleClass() != null) { xhtml.startElement(tas.getTag(), "class", tas.getStyleClass()); } else { xhtml.startElement(tas.getTag()); } if (numbering != null) { xhtml.characters(numbering); } for (int j = 0; j < p.numCharacterRuns(); j++) { CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: if (cr.text().getBytes(UTF_8)[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link if (field != null && (field.getType() == 58 || field.getType() == 56)) { // Embedded Object: add a <div // class="embedded" id="_X"/> so consumer can see where // in the main text each embedded document // occurred: String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", id); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } if (cr.text().equals("\u0013")) { j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml); } else if (cr.text().startsWith("\u0008")) { // Floating Picture(s) for (int pn = 0; pn < cr.text().length(); pn++) { // Assume they're in the order from the unclaimed list... Picture picture = pictures.nextUnclaimed(); // Output handlePictureCharacterRun(cr, picture, pictures, xhtml); } } else if (pictureTable.hasPicture(cr)) { // Inline Picture Picture picture = pictures.getFor(cr); handlePictureCharacterRun(cr, picture, pictures, xhtml); } else { handleCharacterRun(cr, tas.isHeading(), xhtml); } } // Close any still open style tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (curBold) { xhtml.endElement("b"); curBold = false; } xhtml.endElement(tas.getTag()); return 0; }
From source file:org.docx4j.convert.in.Doc.java
License:Apache License
/** * This method is private, since the fact that conversion is (currently) * performed using POI's HWPF should be encapsulated. * /*from w w w.jav a 2 s . com*/ * @param doc * @param wordMLPackage * @return success or failure */ private static void convert(HWPFDocument doc, WordprocessingMLPackage wordMLPackage) throws Exception { // Convert styles org.apache.poi.hwpf.model.StyleSheet stylesheet = doc.getStyleSheet(); // TODO - higher priority // At present, a default set of styles are defined in the output // document. // Convert lists org.apache.poi.hwpf.model.ListTables listTables = doc.getListTables(); // TODO // Convert document properties org.apache.poi.hwpf.model.DocumentProperties docProps = doc.getDocProperties(); // TODO // Convert main document part MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory(); Range r = doc.getRange(); for (int x = 0; x < r.numSections(); x++) { Section s = r.getSection(x); // TODO - convert section for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = s.getParagraph(y); if (p.isInTable()) { Table t = s.getTable(p); int cl = numCol(t); log.info("Found " + t.numRows() + "x" + cl + " table - TODO - convert"); handleTable(wordMLPackage, doc, t, stylesheet, documentPart, factory); // addTODO(factory, wmlP, "[TABLE " + + t.numRows() + "x" + // cl // + " - can't convert tables yet]"); y += t.numParagraphs() - 1; } else { org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc, p, stylesheet, documentPart, factory); documentPart.addObject(paraToAdd); } } } }