List of usage examples for org.jsoup.nodes Element replaceWith
public void replaceWith(Node in)
From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java
/** * replace common tags with just text so we don't have any crazy formatting issues * so replace <br>, <i>, <strong>, etc.... with whatever text is inside them *//*from ww w. j ava 2 s .co m*/ private void replaceTagsWithText() { Elements strongs = topNode.getElementsByTag("strong"); for (Element item : strongs) { TextNode tn = new TextNode(item.text(), topNode.baseUri()); item.replaceWith(tn); } Elements bolds = topNode.getElementsByTag("b"); for (Element item : bolds) { TextNode tn = new TextNode(item.text(), topNode.baseUri()); item.replaceWith(tn); } Elements italics = topNode.getElementsByTag("i"); for (Element item : italics) { TextNode tn = new TextNode(item.text(), topNode.baseUri()); item.replaceWith(tn); } }
From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java
/** * cleans up and converts any nodes that should be considered text into text */// w ww. jav a2s . c om private void convertLinksToText() { if (logger.isDebugEnabled()) { logger.debug("Turning links to text"); } Elements links = topNode.getElementsByTag("a"); for (Element item : links) { if (item.getElementsByTag("img").size() == 0) { TextNode tn = new TextNode(item.text(), topNode.baseUri()); item.replaceWith(tn); } } }
From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java
/** * Removes the HTML5 figure tag and saves the figcaption in the <img> tag's "alt" attribute for later use * @param htmlContent//from ww w .j av a 2 s . c o m * @return */ private Document removeFigureSaveFigcaption(String htmlContent) { Document doc = Jsoup.parseBodyFragment(htmlContent); //figure is a HTML5 tag not accepted by Tidy, so it should be replaced by the content <img>-tag, and the figcaption is saved in the "alt" attribute Elements figureElements = doc.select("figure"); Element figcaptionNode = null; if (figureElements != null) { for (Iterator<Element> iterator = figureElements.iterator(); iterator.hasNext();) { Element figureElement = iterator.next(); Elements figureChildren = figureElement.getAllElements(); Node imageNode = null; if (figureChildren != null) { for (Element figureChild : figureChildren) { if ("img".equals(figureChild.nodeName())) { imageNode = figureChild; } else { if ("figcaption".equals(figureChild.nodeName())) { figcaptionNode = figureChild; //set "figcaption" text as value for "alt" attribute if (imageNode != null) { imageNode.attr("alt", figcaptionNode.text()); } } } } } if (imageNode != null) { figureElement.replaceWith(imageNode); } } } return doc; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
public final static void applyMessages(Element target) { Context context = Context.getCurrentThreadContext(); List<Element> msgElems = target.select(ExtNodeConstants.MSG_NODE_TAG_SELECTOR); for (final Element msgElem : msgElems) { Attributes attributes = msgElem.attributes(); String key = attributes.get(ExtNodeConstants.MSG_NODE_ATTR_KEY); // List<String> externalizeParamKeys = getExternalizeParamKeys(attributes); Object defaultMsg = new Object() { @Override/*from w w w .j a v a2 s . c o m*/ public String toString() { return ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX + msgElem.html(); } }; Locale locale = LocalizeUtil.getLocale(attributes.get(ExtNodeConstants.MSG_NODE_ATTR_LOCALE)); String currentTemplatePath = attributes.get(ExtNodeConstants.ATTR_TEMPLATE_PATH); if (StringUtils.isEmpty(currentTemplatePath)) { logger.warn("There is a msg tag which does not hold corresponding template file path:{}", msgElem.outerHtml()); } else { context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath); } final Map<String, Object> paramMap = getMessageParams(attributes, locale, key); String text; switch (I18nMessageHelperTypeAssistant.configuredHelperType()) { case Mapped: text = I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessageWithDefault(locale, key, defaultMsg, paramMap); break; case Ordered: default: // convert map to array List<Object> numberedParamNameList = new ArrayList<>(); for (int index = 0; paramMap .containsKey(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index); index++) { numberedParamNameList.add(paramMap.get(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index)); } text = I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessageWithDefault(locale, key, defaultMsg, numberedParamNameList.toArray()); } Node node; if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX)) { node = ElementUtil.text(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX.length())); } else if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX)) { node = ElementUtil .parseAsSingle(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX.length())); } else { node = ElementUtil.text(text); } msgElem.replaceWith(node); context.setData(TRACE_VAR_TEMPLATE_PATH, null); } }
From source file:org.niord.core.publication.PublicationUtils.java
/** * Updates the message publications from the publication, parameters and link * * @param message the message//w w w . j a v a2 s .c om * @param publication the publication to extract * @param parameters the optional parameters * @param link the optional link * @param lang either a specific language or null for all languages * @return the message publication or null if not found */ public static MessageVo updateMessagePublications(MessageVo message, SystemPublicationVo publication, String parameters, String link, String lang) { // Sanity check if (message == null || publication == null) { return null; } boolean internal = publication.getMessagePublication() == MessagePublication.INTERNAL; message.getDescs().stream().filter(msgDesc -> lang == null || lang.equals(msgDesc.getLang())) .forEach(msgDesc -> { String updatedPubHtml = computeMessagePublication(publication, parameters, link, msgDesc.getLang()); String pubHtml = internal ? msgDesc.getInternalPublication() : msgDesc.getPublication(); pubHtml = StringUtils.defaultIfBlank(pubHtml, ""); Document doc = Jsoup.parseBodyFragment(pubHtml); String pubAttr = "[publication=" + publication.getPublicationId() + "]"; Element e = doc.select("a" + pubAttr + ",span" + pubAttr).first(); if (e != null) { // TODO: Is there a better way to replace an element? e.replaceWith(Jsoup.parse(updatedPubHtml).body().child(0)); pubHtml = doc.body().html(); } else { pubHtml += " " + updatedPubHtml; } // Lastly, clean up html for artifacts often added by TinyMCE if (StringUtils.isNotBlank(pubHtml)) { pubHtml = pubHtml.replace("<p>", "").replace("</p>", "").trim(); if (internal) { msgDesc.setInternalPublication(pubHtml); } else { msgDesc.setPublication(pubHtml); } } }); return message; }
From source file:us.colloquy.sandbox.TestExtractor.java
@Test public void useJsoup() { String homeDir = System.getProperty("user.home"); System.out.println(homeDir);/* w w w . j a v a 2s . c o m*/ //JSOUP API allows to extract all elements of letters in files // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml"); File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html"); try { Document doc = Jsoup.parse(input, "UTF-8"); List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields String previousYear = ""; for (Element element : doc.getElementsByClass("section")) { Letter letter = new Letter(); StringBuilder content = new StringBuilder(); for (Element child : element.children()) { for (Attribute att : child.attributes()) { System.out.println(att.getKey() + " " + att.getValue()); } if ("center".equalsIgnoreCase(child.className())) { String toWhom = child.getElementsByTag("strong").text(); if (StringUtils.isEmpty(toWhom)) { toWhom = child.text(); // System.out.println(toWhom); } String[] toWhomArray = toWhom.split("(\\s\\s)|(,)"); for (String to : toWhomArray) { RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content } //check if there is anything else here and find date and place - it will be replaced if exists below String entireText = child.text(); String tail = entireText.replace(toWhom, ""); if (StringUtils.isNotEmpty(tail)) { RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present } // System.out.println("two whom\t " + child.getElementsByTag("strong").text() ); } else if ("Data".equalsIgnoreCase(child.className())) { if (child.getElementsByTag("em") != null && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) { RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(), previousYear); //most often date and place are enclosed in em tag if (letter.getDate() != null) { LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault()) .toLocalDate(); int year = localDate.getYear(); previousYear = year + ""; } } // System.out.println("when and where\t " + child.getElementsByTag("em").text()); } else if ("petit".equalsIgnoreCase(child.className()) || "Textpetit_otstup".equalsIgnoreCase(child.className())) { letter.getNotes().add(child.text()); } else { //System.out.println(child.text() ); Elements elements = child.getElementsByTag("sup"); for (Element e : elements) { String value = e.text(); e.replaceWith(new TextNode("[" + value + "]", null)); } for (Element el : child.getAllElements()) { // System.out.println(el.tagName()); if ("sup".equalsIgnoreCase(el.tagName())) { content.append(" [" + el.text() + "] "); } else { content.append(el.text()); } } content.append("\n"); } // System.out.println(child.tag() + "\n" ); // System.out.println(child.outerHtml() + "\n" + child.text()); } letter.setContent(content.toString()); letters.add(letter); } ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter(); for (Letter letter : letters) { // if (letter.getDate() == null) // { // if (StringUtils.isNotEmpty(person.getLastName())) // { String json = ow.writeValueAsString(letter); System.out.println(json); // } //} } } catch (IOException e) { e.printStackTrace(); } }
From source file:us.colloquy.util.DiaryParser.java
private static void replaceSupTag(Element child) { Elements elements = child.getElementsByTag("sup"); for (Element e : elements) { String value = e.text(); e.replaceWith(new TextNode("[" + value + "]", null)); }/* w w w. j a va2s .c o m*/ }