List of usage examples for org.apache.poi.hwpf.usermodel CharacterRun isBold
public boolean isBold()
From source file:com.example.minireader.WordViewActivity.java
License:Apache License
/**html*/ public void writeParagraphContent(Paragraph paragraph) { Paragraph p = paragraph;//from w ww .java2 s. c om int pnumCharacterRuns = p.numCharacterRuns(); for (int j = 0; j < pnumCharacterRuns; j++) { CharacterRun run = p.getCharacterRun(j); if (run.getPicOffset() == 0 || run.getPicOffset() >= 1000) { if (presentPicture < pictures.size()) { // writePicture(); } } else { try { String text = run.text(); if (text.length() >= 2 && pnumCharacterRuns < 2) { // output.write(text.getBytes()); } else { // int size = run.getFontSize(); int color = run.getColor(); String fontSizeBegin = "<font size=\"" + decideSize(size) + "\">"; String fontColorBegin = "<font color=\"" + decideColor(color) + "\">"; String fontEnd = "</font>"; String boldBegin = "<b>"; String boldEnd = "</b>"; String islaBegin = "<i>"; String islaEnd = "</i>"; output.write(fontSizeBegin.getBytes()); output.write(fontColorBegin.getBytes()); if (run.isBold()) { output.write(boldBegin.getBytes()); } if (run.isItalic()) { output.write(islaBegin.getBytes()); } output.write(text.getBytes()); if (run.isBold()) { output.write(boldEnd.getBytes()); } if (run.isItalic()) { output.write(islaEnd.getBytes()); } output.write(fontEnd.getBytes()); output.write(fontEnd.getBytes()); } } catch (Exception e) { System.out.println("Write File Exception"); } } } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java
License:Apache License
private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException { // Skip trailing newlines if (!isRendered(cr) || cr.text().equals("\r")) return;/*from w w w .j ava2 s . com*/ if (!skipStyling) { if (cr.isBold() != curBold) { // Enforce nesting -- must close s and i tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (cr.isBold()) { xhtml.startElement("b"); } else { xhtml.endElement("b"); } curBold = cr.isBold(); } if (cr.isItalic() != curItalic) { // Enforce nesting -- must close s tag if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (cr.isItalic()) { xhtml.startElement("i"); } else { xhtml.endElement("i"); } curItalic = cr.isItalic(); } if (cr.isStrikeThrough() != curStrikeThrough) { if (cr.isStrikeThrough()) { xhtml.startElement("s"); } else { xhtml.endElement("s"); } curStrikeThrough = cr.isStrikeThrough(); } } // Clean up the text String text = cr.text(); text = text.replace('\r', '\n'); if (text.endsWith("\u0007")) { // Strip the table cell end marker text = text.substring(0, text.length() - 1); } // Copied from POI's // org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters: // line tabulator as break line text = text.replace((char) 0x000b, '\n'); // Non-breaking hyphens are returned as char 30 text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN); // Non-required hyphens to zero-width space text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE); // TODO: mj xhtml.characters(text); }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException { // Skip trailing newlines if (!isRendered(cr) || cr.text().equals("\r")) return;//from ww w .java 2s . c o m if (!skipStyling) { if (cr.isBold() != curBold) { // Enforce nesting -- must close s and i tags if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (curItalic) { xhtml.endElement("i"); curItalic = false; } if (cr.isBold()) { xhtml.startElement("b"); } else { xhtml.endElement("b"); } curBold = cr.isBold(); } if (cr.isItalic() != curItalic) { // Enforce nesting -- must close s tag if (curStrikeThrough) { xhtml.endElement("s"); curStrikeThrough = false; } if (cr.isItalic()) { xhtml.startElement("i"); } else { xhtml.endElement("i"); } curItalic = cr.isItalic(); } if (cr.isStrikeThrough() != curStrikeThrough) { if (cr.isStrikeThrough()) { xhtml.startElement("s"); } else { xhtml.endElement("s"); } curStrikeThrough = cr.isStrikeThrough(); } } // Clean up the text String text = cr.text(); text = text.replace('\r', '\n'); if (text.endsWith("\u0007")) { // Strip the table cell end marker text = text.substring(0, text.length() - 1); } // Copied from POI's org/apache/poi/hwpf/converter/AbstractWordConverter.processCharacters: // Non-breaking hyphens are returned as char 30 text = text.replace((char) 30, UNICODECHAR_NONBREAKING_HYPHEN); // Non-required hyphens to zero-width space text = text.replace((char) 31, UNICODECHAR_ZERO_WIDTH_SPACE); // Control characters as line break text = text.replaceAll("[\u0000-\u001f]", "\n"); xhtml.characters(text); }
From source file:org.docx4j.convert.in.Doc.java
License:Apache License
private static org.docx4j.wml.P handleP(WordprocessingMLPackage wordMLPackage, HWPFDocument doc, Paragraph p, org.apache.poi.hwpf.model.StyleSheet stylesheet, MainDocumentPart documentPart, org.docx4j.wml.ObjectFactory factory) { org.docx4j.wml.P wmlP = null;//from ww w. j av a 2 s. c o m if (p.getStyleIndex() > 0) { log.debug("Styled paragraph, with index: " + p.getStyleIndex()); String styleName = stylesheet.getStyleDescription(p.getStyleIndex()).getName(); log.debug(styleName); wmlP = documentPart.createStyledParagraphOfText(stripSpace(styleName), null); } else { wmlP = documentPart.createParagraphOfText(null); } // LineSpacingDescriptor lsd = p.getLineSpacing(); // if (lsd==null || lsd.isEmpty()) { // // do nothing // } else { // PPr pPr = wmlP.getPPr(); // if (pPr==null) { // pPr = Context.getWmlObjectFactory().createPPr(); // wmlP.setPPr(pPr); // } // Spacing spacing = // Context.getWmlObjectFactory().createPPrBaseSpacing(); // spacing.setLine(lsd._dyaLine); // not visible // spacing.setLineRule(STLineSpacingRule.AUTO); // pPr.setSpacing(spacing); // } for (int z = 0; z < p.numCharacterRuns(); z++) { // character run CharacterRun run = p.getCharacterRun(z); // No character styles defined in there?? org.docx4j.wml.RPr rPr = null; if (run.isBold()) { // TODO - HIGH PRIORITY- handle other run properties // esp underline, font size if (rPr == null) { rPr = factory.createRPr(); } org.docx4j.wml.BooleanDefaultTrue boldOn = factory.createBooleanDefaultTrue(); boldOn.setVal(Boolean.TRUE); rPr.setB(boldOn); } //Process image if (doc instanceof HWPFDocument && ((HWPFDocument) doc).getPicturesTable().hasPicture(run)) { Picture picture = doc.getPicturesTable().extractPicture(run, true); Inline inline; try { BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, picture.getContent()); long cx = UnitsOfMeasurement .twipToEMU(Math.round((double) imagePart.getImageInfo().getSize().getWidthMpt() * ((double) picture.getHorizontalScalingFactor() * 0.00001d))) * 2L; long cy = UnitsOfMeasurement .twipToEMU(Math.round((double) imagePart.getImageInfo().getSize().getHeightMpt() * ((double) picture.getVerticalScalingFactor() * 0.00001d))) * 2L; inline = imagePart.createImageInline(null, "", ID1++, ID2++, cx, cy, false); org.docx4j.wml.R imgrun = factory.createR(); org.docx4j.wml.Drawing drawing = factory.createDrawing(); imgrun.getContent().add(drawing); drawing.getAnchorOrInline().add(inline); wmlP.getContent().add(imgrun); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } else { // character run text String text = run.text(); // show us the text log.debug("Processing: " + text); String cleansed = stripNonValidXMLCharacters(text); // Necessary to avoid org.xml.sax.SAXParseException: An invalid // XML character // (Unicode: 0xb) was found in the element content of the // document. // when trying to open the resulting docx. // ie JAXB happily writes (marshals) it, but doesn't want to // unmarshall. if (!text.equals(cleansed)) { log.warn("Cleansed.."); } org.docx4j.wml.Text t = factory.createText(); t.setValue(cleansed); org.docx4j.wml.R wmlRun = factory.createR(); if (rPr != null) { wmlRun.setRPr(rPr); } wmlRun.getRunContent().add(t); wmlP.getParagraphContent().add(wmlRun); } } System.out.println(XmlUtils.marshaltoString(wmlP, true, true)); return wmlP; }