List of usage examples for org.apache.poi.hwpf.usermodel TableCell getParagraph
public Paragraph getParagraph(int index)
From source file:com.icebreak.p2p.front.controller.trade.download.WordParse.java
@Transactional(rollbackFor = Exception.class, value = "transactionManager") public void readwriteWord(HttpServletResponse response, HttpSession session, String _file, Map<String, String> map, List<Map<String, Text>> lst, LoanDemandDO loan, String downType) { //?word?/*ww w .j a va 2 s . c o m*/ FileInputStream in; HWPFDocument hdt = null; String filePath = _file; ServletContext application = session.getServletContext(); String serverRealPath = application.getRealPath("/"); String fileTemp = AppConstantsUtil.getYrdUploadFolder() + File.separator + "doc"; File fileDir = new File(fileTemp); if (!fileDir.exists()) { fileDir.mkdir(); } try { in = new FileInputStream(new File(serverRealPath + filePath)); hdt = new HWPFDocument(in); } catch (Exception e1) { logger.error("??", e1); } //??word? Range range = hdt.getRange(); TableIterator it = new TableIterator(range); Table tb = null; while (it.hasNext()) { tb = it.next(); break; } if (lst.size() > 0) { for (int i = 1; i <= lst.size(); i++) { Map<String, Text> replaces = lst.get(i - 1); TableRow tr = tb.getRow(i); // 0 for (int j = 0; j < tr.numCells(); j++) { TableCell td = tr.getCell(j);// ?? // ?? for (int k = 0; k < td.numParagraphs(); k++) { Paragraph para = td.getParagraph(k); String s = para.text(); final String old = s; for (String key : replaces.keySet()) { if (s.contains(key)) { s = s.replace(key, replaces.get(key).getText()); } } if (!old.equals(s)) {// ? para.replaceText(old, s); s = para.text(); } } // end for } } for (int n = lst.size() + 1; n < tb.numRows(); n++) { TableRow tr = tb.getRow(n); tr.delete(); } } for (Map.Entry<String, String> entry : map.entrySet()) { range.replaceText(entry.getKey(), entry.getValue()); } //String fileName = f[f.length-1]; String fileName = System.currentTimeMillis() + _file.substring(_file.lastIndexOf("."), _file.length()); ByteArrayOutputStream ostream = new ByteArrayOutputStream(); try { FileOutputStream out = new FileOutputStream(fileTemp + fileName);//?word hdt.write(ostream); out.write(ostream.toByteArray()); out.flush(); out.close(); } catch (Exception e) { logger.error("?word", e); } Doc2Pdf doc2pdf = new Doc2Pdf(); String pdfAddress = doc2pdf.createPDF(fileTemp + fileName);//wordpdf try { String fileType = ""; if (lst.size() > 0) {//?? fileType = "contract"; } else {//? fileType = "letter"; } DownloadAndPrivewFileTread downThread = new DownloadAndPrivewFileTread(); //this.downloadAndPreviewFile(response, loan.getLoanName(), pdfAddress, downType, fileType);// downThread.setDownType(downType); downThread.setFilePath(pdfAddress); downThread.setResponse(response); downThread.setFileType(fileType); downThread.setProName(loan.getLoanName()); downThread.run(); File pdfFile = new File(pdfAddress); pdfFile.delete(); } catch (Exception e) { logger.error("pdf", e); } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java
License:Apache License
private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { // Note - a poi bug means we can't currently properly recurse // into nested tables, so currently we don't if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) { Table t = r.getTable(p);/*from w w w . j a v a 2s.co m*/ xhtml.startElement("table"); xhtml.startElement("tbody"); for (int rn = 0; rn < t.numRows(); rn++) { TableRow row = t.getRow(rn); xhtml.startElement("tr"); for (int cn = 0; cn < row.numCells(); cn++) { TableCell cell = row.getCell(cn); xhtml.startElement("td"); for (int pn = 0; pn < cell.numParagraphs(); pn++) { Paragraph cellP = cell.getParagraph(pn); handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, xhtml); } xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); return (t.numParagraphs() - 1); } TagAndStyle tas; if (document.getStyleSheet().numStyles() > p.getStyleIndex()) { StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (style != null && style.getName() != null && style.getName().length() > 0) { tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0)); } else { tas = new TagAndStyle("p", null); } } else { tas = new TagAndStyle("p", null); } if (tas.getStyleClass() != null) { xhtml.startElement(tas.getTag(), "class", tas.getStyleClass()); } else { xhtml.startElement(tas.getTag()); } for (int j = 0; j < p.numCharacterRuns(); j++) { CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: if (cr.text().getBytes()[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link if (field != null && (field.getType() == 58 || field.getType() == 56)) { // Embedded Object: add a <div // class="embedded" id="_X"/> so consumer can see where // in the main text each embedded document // occurred: String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", id); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } if (cr.text().equals("\u0013")) { j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml); } else if (cr.text().startsWith("\u0008")) { // Floating Picture(s) for (int pn = 0; pn < cr.text().length(); pn++) { // Assume they're in the order from the unclaimed list... Picture picture = pictures.nextUnclaimed(); // Output handlePictureCharacterRun(cr, picture, pictures, xhtml); } } else if (pictureTable.hasPicture(cr)) { // Inline Picture Picture picture = pictures.getFor(cr); handlePictureCharacterRun(cr, picture, pictures, xhtml); } else { handleCharacterRun(cr, tas.isHeading(), xhtml); } } // Close any still open style tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (curBold) { xhtml.endElement("b"); curBold = false; } xhtml.endElement(tas.getTag()); return 0; }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, ListManager listManager, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { // Note - a poi bug means we can't currently properly recurse // into nested tables, so currently we don't if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) { Table t = r.getTable(p);/*from w w w .j a va 2 s . co m*/ xhtml.startElement("table"); xhtml.startElement("tbody"); for (int rn = 0; rn < t.numRows(); rn++) { TableRow row = t.getRow(rn); xhtml.startElement("tr"); for (int cn = 0; cn < row.numCells(); cn++) { TableCell cell = row.getCell(cn); xhtml.startElement("td"); for (int pn = 0; pn < cell.numParagraphs(); pn++) { Paragraph cellP = cell.getParagraph(pn); handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, listManager, xhtml); } xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("tbody"); xhtml.endElement("table"); return (t.numParagraphs() - 1); } String text = p.text(); if (text.replaceAll("[\\r\\n\\s]+", "").isEmpty()) { // Skip empty paragraphs return 0; } TagAndStyle tas; String numbering = null; if (document.getStyleSheet().numStyles() > p.getStyleIndex()) { StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (style != null && style.getName() != null && style.getName().length() > 0) { if (p.isInList()) { numbering = listManager.getFormattedNumber(p); } tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0)); } else { tas = new TagAndStyle("p", null); } } else { tas = new TagAndStyle("p", null); } if (tas.getStyleClass() != null) { xhtml.startElement(tas.getTag(), "class", tas.getStyleClass()); } else { xhtml.startElement(tas.getTag()); } if (numbering != null) { xhtml.characters(numbering); } for (int j = 0; j < p.numCharacterRuns(); j++) { CharacterRun cr = p.getCharacterRun(j); // FIELD_BEGIN_MARK: if (cr.text().getBytes(UTF_8)[0] == 0x13) { Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset()); // 58 is an embedded document // 56 is a document link if (field != null && (field.getType() == 58 || field.getType() == 56)) { // Embedded Object: add a <div // class="embedded" id="_X"/> so consumer can see where // in the main text each embedded document // occurred: String id = "_" + field.getMarkSeparatorCharacterRun(r).getPicOffset(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", id); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } if (cr.text().equals("\u0013")) { j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml); } else if (cr.text().startsWith("\u0008")) { // Floating Picture(s) for (int pn = 0; pn < cr.text().length(); pn++) { // Assume they're in the order from the unclaimed list... Picture picture = pictures.nextUnclaimed(); // Output handlePictureCharacterRun(cr, picture, pictures, xhtml); } } else if (pictureTable.hasPicture(cr)) { // Inline Picture Picture picture = pictures.getFor(cr); handlePictureCharacterRun(cr, picture, pictures, xhtml); } else { handleCharacterRun(cr, tas.isHeading(), xhtml); } } // Close any still open style tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (curBold) { xhtml.endElement("b"); curBold = false; } xhtml.endElement(tas.getTag()); return 0; }
From source file:org.docx4j.convert.in.Doc.java
License:Apache License
private static void handleTable(WordprocessingMLPackage wordMLPackage, HWPFDocument doc, Table t, org.apache.poi.hwpf.model.StyleSheet stylesheet, MainDocumentPart documentPart, org.docx4j.wml.ObjectFactory factory) { org.docx4j.wml.Tbl tbl = factory.createTbl(); documentPart.addObject(tbl);/*from ww w .j av a2s . com*/ org.docx4j.wml.TblPr tblPr = factory.createTblPr(); tbl.setTblPr(tblPr); // TODO - set tblPr values org.docx4j.wml.TblGrid tblGrid = factory.createTblGrid(); tbl.setTblGrid(tblGrid); // TODO - set tblGrid values for (int i = 0; i < t.numRows(); i++) { TableRow tr = t.getRow(i); org.docx4j.wml.Tr trOut = factory.createTr(); tbl.getEGContentRowContent().add(trOut); for (int j = 0; j < tr.numCells(); j++) { TableCell tc = tr.getCell(j); org.docx4j.wml.Tc tcOut = factory.createTc(); trOut.getEGContentCellContent().add(tcOut); // System.out.println("CELL[" + i + "][" + j + "]=" + // tc.text()); for (int y = 0; y < tc.numParagraphs(); y++) { Paragraph p = tc.getParagraph(y); // Nested tables? // if (p.isInTable()) ??? // TOTO fill up parameters org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc, p, stylesheet, documentPart, factory); tcOut.getEGBlockLevelElts().add(paraToAdd); log.debug("Added p to tc"); } } } }