List of usage examples for org.jsoup.nodes Element childNodes
List childNodes
To view the source code for org.jsoup.nodes Element childNodes.
Click Source Link
From source file:org.asqatasun.rules.textbuilder.DeepTextElementBuilder.java
@Override public String buildTextFromElement(Element element) { StringBuilder elementText = new StringBuilder(); if (element.hasAttr(ALT_ATTR)) { elementText.append(SPACER);//from w w w.j a v a 2 s .c om elementText.append(altAttrTextBuilder.buildTextFromElement(element)); } for (Node child : element.childNodes()) { if (child instanceof TextNode && !((TextNode) child).isBlank()) { elementText.append(SPACER); elementText.append(StringUtils.trim(((TextNode) child).text())); } else if (child instanceof Element) { elementText.append(SPACER); elementText.append(buildTextFromElement((Element) child)); } } return StringUtils.trim(elementText.toString()); }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
private static void enhanceNodes(final List<Node> nodes) { nodes.forEach(node -> {//from w w w. jav a 2s . c o m if (node instanceof Element) { Element element = (Element) node; final Attributes attributes = element.attributes(); enhanceAttributes(attributes); final List<Node> childNodes = element.childNodes(); enhanceNodes(childNodes); return; } if (node instanceof TextNode) { enhanceTextNode(node); } }); }
From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java
/** * Extract the text of an image link//from w ww .j a v a2 s.co m * * @param element * @return */ public static String extractImageLinkText(Element element) { StringBuilder strb = new StringBuilder(); for (Node node : element.childNodes()) { if (node instanceof TextNode) { strb.append(((TextNode) node).text()); } else if (node instanceof Element && StringUtils.equalsIgnoreCase(node.nodeName(), HtmlElementStore.IMG_ELEMENT) && node.hasAttr(ALT_ATTR)) { strb.append(node.attr(ALT_ATTR).trim()); } else if (node instanceof Element) { strb.append(extractImageLinkText((Element) node)); } } return StringUtil.normaliseWhitespace(strb.toString().trim()); }
From source file:org.opens.tanaguru.rules.textbuilder.LinkTextElementBuilder.java
@Override public String buildTextFromElement(Element element) { StringBuilder linkText = new StringBuilder(); if (element.hasAttr(ALT_ATTR)) { linkText.append(SPACER);/*from ww w .ja v a 2s . c o m*/ linkText.append(altAttrTextBuilder.buildTextFromElement(element)); } for (Node child : element.childNodes()) { if (child instanceof TextNode && !((TextNode) child).isBlank()) { linkText.append(SPACER); linkText.append(StringUtils.trim(((TextNode) child).text())); } else if (child instanceof Element) { linkText.append(SPACER); linkText.append(buildTextFromElement((Element) child)); } } return StringUtils.trim(linkText.toString()); }
From source file:xxx.web.comments.debates.impl.ProConOrgCommentsParser.java
/** * Extracts the document of the quote//from w ww. j a va 2 s . c o m * * @param textElement text quote element * @return plain string with paragraphs kept */ protected static String extractPlainTextFromTextElement(Element textElement) { StringBuilder sb = new StringBuilder(); for (Node childNode : textElement.childNodes()) { if (childNode instanceof Element) { Element childElement = (Element) childNode; String tagName = childElement.tagName(); if ("p".equals(tagName) || "span".equals(tagName)) { sb.append(childElement.text()); sb.append("\n"); } else if ("br".equals(tagName)) { // prevent double newlines sb = new StringBuilder(sb.toString().trim()); sb.append("\n"); } } else if (childNode instanceof TextNode) { TextNode textNode = (TextNode) childNode; sb.append(textNode.text()); } } // remove leading + ending quotes return Utils.normalize(sb.toString()).replaceAll("[(^\")(\"$)]", ""); }
From source file:xxx.web.comments.debates.impl.ProConOrgParser.java
/** * Extracts the document of the quote//from w ww. j av a 2 s . c o m * * @param textElement text quote element * @return plain string with paragraphs kept */ public static String extractPlainTextFromTextElement(Element textElement) { StringBuilder sb = new StringBuilder(); for (Node childNode : textElement.childNodes()) { if (childNode instanceof Element) { Element childElement = (Element) childNode; String tagName = childElement.tagName(); if ("p".equals(tagName) || "span".equals(tagName)) { sb.append(childElement.text()); sb.append("\n"); } else if ("br".equals(tagName)) { // prevent double newlines sb = new StringBuilder(sb.toString().trim()); sb.append("\n"); } } else if (childNode instanceof TextNode) { TextNode textNode = (TextNode) childNode; sb.append(textNode.text()); } } // remove leading + ending quotes return Utils.normalize(sb.toString()).replaceAll("[(^\")(\"$)]", ""); }