Example usage for org.jsoup.nodes Node toString

List of usage examples for org.jsoup.nodes Node toString

Introduction

In this page you can find the example usage for org.jsoup.nodes Node toString.

Prototype

public String toString() 

Source Link

Document

Gets this node's outer HTML.

Usage

From source file:com.bibisco.manager.TextEditorManager.java

private static void parseTextNode(HtmlParsingResult pHtmlParsingResult, Node pNode) {

    List<String> lListWords = new ArrayList<String>();

    mLog.debug("Start parseTextNode(HtmlParsingResult, Node): ", pNode.toString());

    // character count
    String lStrNodeText = StringUtils.replace(pNode.toString(), "&nbsp;", " ");
    lStrNodeText = StringUtils.replace(lStrNodeText, "\n", "");
    lStrNodeText = StringEscapeUtils.unescapeHtml(lStrNodeText);
    pHtmlParsingResult.characterCount += lStrNodeText.length();

    // extract words
    lStrNodeText = pNode.toString();// w  w  w .  j  a  v  a 2  s.c  o m
    lStrNodeText = StringUtils.replace(lStrNodeText, "&nbsp;", "");
    lStrNodeText = StringUtils.replace(lStrNodeText, "&laquo;", "");
    lStrNodeText = StringUtils.replace(lStrNodeText, "&raquo;", "");
    lStrNodeText = StringUtils.replace(lStrNodeText, "&mdash;", "");
    lStrNodeText = StringEscapeUtils.unescapeHtml(lStrNodeText);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 33, 38);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 40, 47);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 58, 64);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 91, 96);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 123, 126);
    lStrNodeText = replaceCharIntervalWithWhiteSpace(lStrNodeText, 161, 191);
    lStrNodeText = StringUtils.replaceChars(lStrNodeText, '', ' ');
    lStrNodeText = StringUtils.replaceChars(lStrNodeText, '', ' ');
    lStrNodeText = StringUtils.replaceChars(lStrNodeText, '', ' ');
    lStrNodeText = lStrNodeText.trim();

    if (StringUtils.isNotBlank(lStrNodeText)) {
        StringTokenizer lStringTokenizer = new StringTokenizer(lStrNodeText);
        while (lStringTokenizer.hasMoreTokens()) {
            lListWords.add(lStringTokenizer.nextToken());
        }
    }
    pHtmlParsingResult.words.addAll(lListWords);

    mLog.debug("End parseTextNode(HtmlParsingResult, Node)");
}

From source file:org.coronastreet.gpxconverter.GarminForm.java

private static String findFlowKey(Node node) {
    String key = null;/*w w w  .  j  a  va 2 s.  c om*/
    for (int i = 0; i < node.childNodes().size();) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment")) {
            //System.out.println(child.toString());
            String flowKeyPattern = "\\<\\!-- flowExecutionKey\\: \\[(e1s1)\\] --\\>";
            key = child.toString().replaceAll(flowKeyPattern, "$1").trim();
            break;
        } else {
            findFlowKey(child);
            i++;
        }
    }
    return key;
}

From source file:com.screenslicer.common.CommonUtil.java

private static Element sanitize(Document doc, final boolean ascii) {
    if (ascii) {/*w ww.  j a  v a 2  s  .c  o m*/
        doc.outputSettings().charset("ascii");
    } else {
        doc.outputSettings().charset("utf-8");
    }
    doc.traverse(new NodeVisitor() {
        @Override
        public void tail(Node n, int d) {
        }

        @Override
        public void head(Node n, int d) {
            try {
                if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) {
                    ((TextNode) n).text(HtmlCoder.decode(n.toString()));
                }
            } catch (Throwable t) {
                Log.exception(t);
            }
        }
    });
    return doc;
}

From source file:com.screenslicer.core.util.Util.java

public static boolean isEmpty(Node node, boolean doFilter) {
    return node == null || node.nodeName().equals("#comment") || node.nodeName().equals("#data")
            || node.nodeName().equals("style") || node.nodeName().equals("script") || isHidden(node)
            || (doFilter && isFiltered(node))
            || (node.nodeName().equals("#text") && CommonUtil.isEmpty(node.toString(), true));
}

From source file:net.poemerchant.scraper.ShopScraper.java

public List<Buyout> scrapeItemBuyouts(int noOfItems) {
    buyouts = new ArrayList<Buyout>(noOfItems);
    for (int i = 0; i < noOfItems; i++) {
        Buyout buyout = Buyout.NONE;/* w  w w  .  ja  v  a 2 s.  com*/
        Node itemElem = doc.select("#item-fragment-" + i).first();
        Element itemElemNext = doc.select("#item-fragment-" + (i + 1)).first();
        if (itemElem != null) {
            while (!itemElem.equals(itemElemNext)) {
                itemElem = itemElem.nextSibling();

                if (itemElem == null) {
                    // case where there is no b/o set and we've reached the end of a spoiler
                    break;
                }

                if (Element.class.isAssignableFrom(itemElem.getClass()))
                    continue;
                String boRaw = StringUtils.trim(itemElem.toString());
                String[] split = StringUtils.split(boRaw);
                if (split.length == 3) {
                    BuyoutMode buyoutMode = BuyoutMode.parse(split[0]);
                    if (buyoutMode != BuyoutMode.unknown) {
                        buyout = new Buyout(boRaw);
                        break;
                    }
                }
            }
        } else {
            logger.severe(
                    "Actual item in the OP was not found. Buyout will be defaulted to NONE. Item index is "
                            + i);
        }
        buyouts.add(buyout);
    }

    return buyouts;
}

From source file:com.screenslicer.core.scrape.type.ComparableNode.java

public ComparableNode(final Node node) {
    this.node = node;
    List<Node> separated = node.childNodes();
    int children = 0;
    int childBlocks = 0;
    int childFormatting = 0;
    int childContent = 0;
    int childItems = 0;
    int childDecoration = 0;
    int anchorChildren = 0;
    int textChildren = 0;
    int anchorTextChildren = 0;
    int anchorChildItems = 0;
    int textChildItems = 0;
    int anchorTextChildItems = 0;
    int itemChars = 0;
    int itemAnchorChars = 0;
    List<String> firstChildTags = null;
    List<List<String>> orderedTags = new ArrayList<List<String>>();
    List<String> allChildTags = new ArrayList<String>();
    ArrayList<List<String>> childTags = new ArrayList<List<String>>();
    boolean childrenConsistent = true;
    String childName = null;/*from   w w  w.j  av a2  s .c  om*/
    boolean childrenSame = true;
    double avgChildLengthDouble = 0d;
    int nodeStrLen = Util.trimmedLen(node.toString());
    DescriptiveStatistics statAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statChars = new DescriptiveStatistics();
    DescriptiveStatistics statDescendants = new DescriptiveStatistics();
    DescriptiveStatistics statFields = new DescriptiveStatistics();
    DescriptiveStatistics statLevels = new DescriptiveStatistics();
    DescriptiveStatistics statLongestField = new DescriptiveStatistics();
    DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statTextAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statStrLen = new DescriptiveStatistics();
    DescriptiveStatistics statItemChars = new DescriptiveStatistics();
    DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics();
    for (Node child : separated) {
        if (!Util.isEmpty(child)) {
            children++;
            int childStrLen = Util.trimmedLen(child.toString());
            avgChildLengthDouble += childStrLen;
            NodeCounter counter = new NodeCounter(child);
            if (Util.isItem(child.nodeName())) {
                ++childItems;
                anchorChildItems += counter.anchors() > 0 ? 1 : 0;
                textChildItems += counter.fields() > 0 ? 1 : 0;
                anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;
                itemChars += counter.chars();
                itemAnchorChars += counter.anchorChars();
                statItemChars.addValue(counter.chars());
                statItemAnchorChars.addValue(counter.anchorChars());
            }
            if (Util.isBlock(child.nodeName())) {
                ++childBlocks;
            }
            if (Util.isDecoration(child.nodeName())) {
                ++childDecoration;
            }
            if (Util.isFormatting(child.nodeName())) {
                ++childFormatting;
            }
            if (Util.isContent(child)) {
                ++childContent;
            }

            anchorChildren += counter.anchors() > 0 ? 1 : 0;
            textChildren += counter.fields() > 0 ? 1 : 0;
            anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;

            statAnchorChars.addValue(counter.anchorChars());
            statAnchors.addValue(counter.anchors());
            statChars.addValue(counter.chars());
            statDescendants.addValue(counter.descendants());
            statFields.addValue(counter.fields());
            statLevels.addValue(counter.levels());
            statLongestField.addValue(counter.longestField());
            statNonAnchorChars.addValue(counter.nonAnchorChars());
            statTextAnchors.addValue(counter.textAnchors());
            statStrLen.addValue(childStrLen);

            List<String> curChildTags = counter.tags();
            allChildTags = Util.join(allChildTags, curChildTags);
            childTags.add(curChildTags);
            if (firstChildTags == null) {
                firstChildTags = curChildTags;
            } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) {
                childrenConsistent = false;
            }

            if (childName == null) {
                childName = child.nodeName();
            } else if (childrenSame && !childName.equals(child.nodeName())) {
                childrenSame = false;
            }

            if (!Util.contains(counter.orderedTags(), orderedTags)) {
                orderedTags.add(counter.orderedTags());
            }
        }
    }
    avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children;
    int avgChildLength = (int) avgChildLengthDouble;
    double avgChildDiff = 0;
    int maxChildDiff = 0;
    for (List<String> tagList : childTags) {
        avgChildDiff += allChildTags.size() - tagList.size();
        maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size());
    }
    avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size();

    childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent;

    NodeCounter counter = new NodeCounter(separated);
    int siblings = 0;
    for (Node sibling : node.parent().childNodes()) {
        if (!Util.isEmpty(sibling)) {
            siblings++;
        }
    }
    this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(),
            counter.content(), div(counter.items(), children), div(counter.blocks(), children),
            div(counter.decoration(), children), div(counter.formatting(), children),
            div(counter.content(), children),

            childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength,

            counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(),
            div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)),

            counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(),
            itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(),
            div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems),
            div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent),
            div(counter.nonAnchorChars(), counter.anchors()),
            div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars,
            div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()),
            div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children),

            counter.descendants(), counter.levels(), div(counter.descendants(), children),
            div(children, counter.levels()), siblings, children,

            maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent),
            orderedTags.size(),

            mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL),
            distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL),
            mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL),
            distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL),
            mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL),
            distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL),
            mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL),
            distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL),
            mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL),
            distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL),
            mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL),
            distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL),
            mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL),
            distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL),
            mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL),
            distance(counter.textAnchors(), RESULT_GROUP_LARGE),
            distance(counter.textAnchors(), RESULT_GROUP_SMALL),

            Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()),
            evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()),
            smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()),

            Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()),
            Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems),
            Math.abs(children - textChildren), Math.abs(childItems - textChildItems),
            Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems),
            evenlyDivisible(children, counter.textAnchors()),
            evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren),
            evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren),
            evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren),
            evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()),
            smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren),
            smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren),
            smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren),
            smallestMod(childItems, anchorTextChildItems),

            Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems),
            Math.abs(anchorTextChildren - anchorTextChildItems),

            toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()),
            toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()),
            toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()),
            toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()),
            toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()),
            toInt(statDescendants.getMean()), toInt(statFields.getSkewness()),
            toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()),
            toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()),
            toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()),
            toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()),
            toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()),
            toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()),
            toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()),
            toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()),
            toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()),
            toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()),
            toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()),
            toInt(statItemAnchorChars.getMean()), };
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void unescapeEntity(final PrintWriter out, final Node node) {

    node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml)
            .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false);

    if (node instanceof TextNode) {

        final TextNode textNode = (TextNode) node;

        final String wholeText = textNode.getWholeText();

        out.print(wholeText);/*from  ww  w.ja v a2  s.c  o m*/

        return;
    }

    final String nodeString = node.toString();
    final String unescapedNodeString = Parser.unescapeEntities(nodeString, true);

    out.print(unescapedNodeString);
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void enhanceTextNode(final Node node) {

    final TextNode textNode = (TextNode) node;
    final String wholeText = textNode.getWholeText();
    final String text = node.toString();

    if (text.trim().isEmpty()) {

        return;/*from  w  ww.  j  av  a  2  s .c  o  m*/
    }

    if (wholeText.startsWith(START_CDATA)) {

        // do not add CDATA multiple times

        return;
    }

    final String alignedText = alignTextWithWholeText(wholeText, text);

    final String unescapeEntities = String.format("%s%s%s", START_CDATA, alignedText, END_CDATA);

    textNode.text(unescapeEntities);
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        String text = ((TextNode) node).text().trim();
        if (members.containsKey(text)) {
            Node parent = node.parent();
            if (parent != null && !"td".equals(parent.nodeName())) {
                parent = parent.parent();
            }//w w w . j  a va2 s  . c  o  m
            if (parent != null && !"td".equals(parent.nodeName())) {
                parent = parent.parent();
            }
            if (parent != null && "td".equals(parent.nodeName())) {
                List<Node> siblings = parent.parent().childNodes();
                List<Node> tdSiblings = new ArrayList<Node>();
                siblings.forEach(n -> {
                    if ("td".equals(n.nodeName()))
                        tdSiblings.add(n);
                });

                if (tdSiblings.get(0) == parent && tdSiblings.size() == 3) {
                    Node td = tdSiblings.get(2);
                    String s = td.toString();
                    String doc = "/** " + DocFiller.removeTags(s.substring(4, s.length() - 5)) + " */";
                    for (Declaration d : members.get(text)) {
                        d.setDocumentation(doc);
                    }
                    docFiller.countDoc(false);
                }
            }
        }
    }
}