Example usage for org.jsoup.nodes Element childNodes

List of usage examples for org.jsoup.nodes Element childNodes

Introduction

In this page you can find the example usage for org.jsoup.nodes Element childNodes.

Prototype

List childNodes

To view the source code for org.jsoup.nodes Element childNodes.

Click Source Link

Usage

From source file:org.asqatasun.rules.textbuilder.DeepTextElementBuilder.java

@Override
public String buildTextFromElement(Element element) {
    StringBuilder elementText = new StringBuilder();
    if (element.hasAttr(ALT_ATTR)) {
        elementText.append(SPACER);//from  w  w  w.j  a v a  2 s  .c om
        elementText.append(altAttrTextBuilder.buildTextFromElement(element));
    }
    for (Node child : element.childNodes()) {
        if (child instanceof TextNode && !((TextNode) child).isBlank()) {
            elementText.append(SPACER);
            elementText.append(StringUtils.trim(((TextNode) child).text()));
        } else if (child instanceof Element) {
            elementText.append(SPACER);
            elementText.append(buildTextFromElement((Element) child));
        }
    }
    return StringUtils.trim(elementText.toString());
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void enhanceNodes(final List<Node> nodes) {

    nodes.forEach(node -> {//from  w  w  w. jav a 2s . c  o  m

        if (node instanceof Element) {

            Element element = (Element) node;

            final Attributes attributes = element.attributes();

            enhanceAttributes(attributes);

            final List<Node> childNodes = element.childNodes();

            enhanceNodes(childNodes);

            return;
        }

        if (node instanceof TextNode) {

            enhanceTextNode(node);
        }
    });
}

From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java

/**
 * Extract the text of an image link//from w ww  .j a v a2  s.co m
 * 
 * @param element
 * @return 
 */
public static String extractImageLinkText(Element element) {
    StringBuilder strb = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            strb.append(((TextNode) node).text());
        } else if (node instanceof Element
                && StringUtils.equalsIgnoreCase(node.nodeName(), HtmlElementStore.IMG_ELEMENT)
                && node.hasAttr(ALT_ATTR)) {
            strb.append(node.attr(ALT_ATTR).trim());
        } else if (node instanceof Element) {
            strb.append(extractImageLinkText((Element) node));
        }
    }
    return StringUtil.normaliseWhitespace(strb.toString().trim());
}

From source file:org.opens.tanaguru.rules.textbuilder.LinkTextElementBuilder.java

@Override
public String buildTextFromElement(Element element) {
    StringBuilder linkText = new StringBuilder();
    if (element.hasAttr(ALT_ATTR)) {
        linkText.append(SPACER);/*from ww w .ja v  a  2s .  c  o m*/
        linkText.append(altAttrTextBuilder.buildTextFromElement(element));
    }
    for (Node child : element.childNodes()) {
        if (child instanceof TextNode && !((TextNode) child).isBlank()) {
            linkText.append(SPACER);
            linkText.append(StringUtils.trim(((TextNode) child).text()));
        } else if (child instanceof Element) {
            linkText.append(SPACER);
            linkText.append(buildTextFromElement((Element) child));
        }
    }
    return StringUtils.trim(linkText.toString());
}

From source file:xxx.web.comments.debates.impl.ProConOrgCommentsParser.java

/**
 * Extracts the document of the quote//from w  ww.  j a  va  2  s  .  c  o  m
 *
 * @param textElement text quote element
 * @return plain string with paragraphs kept
 */
protected static String extractPlainTextFromTextElement(Element textElement) {
    StringBuilder sb = new StringBuilder();

    for (Node childNode : textElement.childNodes()) {
        if (childNode instanceof Element) {
            Element childElement = (Element) childNode;

            String tagName = childElement.tagName();

            if ("p".equals(tagName) || "span".equals(tagName)) {
                sb.append(childElement.text());
                sb.append("\n");
            } else if ("br".equals(tagName)) {
                // prevent double newlines
                sb = new StringBuilder(sb.toString().trim());
                sb.append("\n");
            }

        } else if (childNode instanceof TextNode) {
            TextNode textNode = (TextNode) childNode;

            sb.append(textNode.text());
        }
    }

    // remove leading + ending quotes
    return Utils.normalize(sb.toString()).replaceAll("[(^\")(\"$)]", "");
}

From source file:xxx.web.comments.debates.impl.ProConOrgParser.java

/**
 * Extracts the document of the quote//from   w ww.  j av  a 2  s .  c o  m
 *
 * @param textElement text quote element
 * @return plain string with paragraphs kept
 */
public static String extractPlainTextFromTextElement(Element textElement) {
    StringBuilder sb = new StringBuilder();

    for (Node childNode : textElement.childNodes()) {
        if (childNode instanceof Element) {
            Element childElement = (Element) childNode;

            String tagName = childElement.tagName();

            if ("p".equals(tagName) || "span".equals(tagName)) {
                sb.append(childElement.text());
                sb.append("\n");
            } else if ("br".equals(tagName)) {
                // prevent double newlines
                sb = new StringBuilder(sb.toString().trim());
                sb.append("\n");
            }

        } else if (childNode instanceof TextNode) {
            TextNode textNode = (TextNode) childNode;

            sb.append(textNode.text());
        }
    }

    // remove leading + ending quotes
    return Utils.normalize(sb.toString()).replaceAll("[(^\")(\"$)]", "");
}