Example usage for org.jsoup.nodes Node parent

List of usage examples for org.jsoup.nodes Node parent

Introduction

In this page you can find the example usage for org.jsoup.nodes Node parent.

Prototype

public Node parent() 

Source Link

Document

Gets this node's parent node.

Usage

From source file:com.screenslicer.core.scrape.Dissect.java

public static String nodeHash(Node node, List<Node> nodes, boolean lenientUrl, boolean lenientTitle) {
    StringBuilder position = new StringBuilder();
    Node cur = node;
    while (cur != null) {
        position.append("<<0>>");
        position.append(cur.hashCode());
        position.append("<<1>>");
        position.append(cur.nodeName());
        position.append("<<2>>");
        position.append(cur.childNodes().size());
        position.append("<<3>>");
        position.append(cur.siblingIndex());
        position.append("<<4>>");
        cur = cur.parent();
    }// ww  w . j  a  va  2  s.  c o  m
    for (Node child : node.childNodes()) {
        position.append("<<c0>>");
        position.append(child.hashCode());
        position.append("<<c1>>");
        position.append(child.nodeName());
        position.append("<<c2>>");
        position.append(child.childNodes().size());
        position.append("<<c3>>");
        position.append(child.siblingIndex());
        position.append("<<c4>>");
    }
    if (nodes != null) {
        for (Node n : nodes) {
            position.append("<<s0>>");
            position.append(n.hashCode());
            position.append("<<s1>>");
            position.append(n.nodeName());
            position.append("<<s2>>");
            position.append(n.childNodes().size());
            position.append("<<s3>>");
            position.append(n.siblingIndex());
            position.append("<<s4>>");
        }
    }
    position.append("<<>>");
    position.append(Util.outerHtml(node).hashCode());
    return "dissectedResults-<<" + lenientUrl + ">>-<<" + lenientTitle + ">>-" + position.toString();
}

From source file:com.screenslicer.core.util.Util.java

public static int nearestBlock(Node node) {
    int nearest = 0;
    Node parent = node.parent();
    while (parent != null) {
        ++nearest;//from w  w  w  .  j  a  v  a 2s .c  o m
        if (isProximityBlock(parent.nodeName())) {
            return nearest;
        }
        parent = parent.parent();
    }
    return Integer.MAX_VALUE;
}

From source file:com.screenslicer.core.util.Util.java

private static void markVisible(Node node) {
    if (node != null) {
        if (node.nodeName().equals("select")) {
            for (Node child : node.childNodes()) {
                child.attr("class", hiddenMarker.matcher(child.attr("class")).replaceAll(""));
            }/*from   w  w  w  . j a v  a  2  s. com*/
        }
        node.attr("class", hiddenMarker.matcher(node.attr("class")).replaceAll(""));
        markVisible(node.parent());
    }
}

From source file:com.screenslicer.core.scrape.type.ComparableNode.java

public ComparableNode(final Node node) {
    this.node = node;
    List<Node> separated = node.childNodes();
    int children = 0;
    int childBlocks = 0;
    int childFormatting = 0;
    int childContent = 0;
    int childItems = 0;
    int childDecoration = 0;
    int anchorChildren = 0;
    int textChildren = 0;
    int anchorTextChildren = 0;
    int anchorChildItems = 0;
    int textChildItems = 0;
    int anchorTextChildItems = 0;
    int itemChars = 0;
    int itemAnchorChars = 0;
    List<String> firstChildTags = null;
    List<List<String>> orderedTags = new ArrayList<List<String>>();
    List<String> allChildTags = new ArrayList<String>();
    ArrayList<List<String>> childTags = new ArrayList<List<String>>();
    boolean childrenConsistent = true;
    String childName = null;/*w ww  . j  a v a  2  s.  c o  m*/
    boolean childrenSame = true;
    double avgChildLengthDouble = 0d;
    int nodeStrLen = Util.trimmedLen(node.toString());
    DescriptiveStatistics statAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statChars = new DescriptiveStatistics();
    DescriptiveStatistics statDescendants = new DescriptiveStatistics();
    DescriptiveStatistics statFields = new DescriptiveStatistics();
    DescriptiveStatistics statLevels = new DescriptiveStatistics();
    DescriptiveStatistics statLongestField = new DescriptiveStatistics();
    DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statTextAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statStrLen = new DescriptiveStatistics();
    DescriptiveStatistics statItemChars = new DescriptiveStatistics();
    DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics();
    for (Node child : separated) {
        if (!Util.isEmpty(child)) {
            children++;
            int childStrLen = Util.trimmedLen(child.toString());
            avgChildLengthDouble += childStrLen;
            NodeCounter counter = new NodeCounter(child);
            if (Util.isItem(child.nodeName())) {
                ++childItems;
                anchorChildItems += counter.anchors() > 0 ? 1 : 0;
                textChildItems += counter.fields() > 0 ? 1 : 0;
                anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;
                itemChars += counter.chars();
                itemAnchorChars += counter.anchorChars();
                statItemChars.addValue(counter.chars());
                statItemAnchorChars.addValue(counter.anchorChars());
            }
            if (Util.isBlock(child.nodeName())) {
                ++childBlocks;
            }
            if (Util.isDecoration(child.nodeName())) {
                ++childDecoration;
            }
            if (Util.isFormatting(child.nodeName())) {
                ++childFormatting;
            }
            if (Util.isContent(child)) {
                ++childContent;
            }

            anchorChildren += counter.anchors() > 0 ? 1 : 0;
            textChildren += counter.fields() > 0 ? 1 : 0;
            anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;

            statAnchorChars.addValue(counter.anchorChars());
            statAnchors.addValue(counter.anchors());
            statChars.addValue(counter.chars());
            statDescendants.addValue(counter.descendants());
            statFields.addValue(counter.fields());
            statLevels.addValue(counter.levels());
            statLongestField.addValue(counter.longestField());
            statNonAnchorChars.addValue(counter.nonAnchorChars());
            statTextAnchors.addValue(counter.textAnchors());
            statStrLen.addValue(childStrLen);

            List<String> curChildTags = counter.tags();
            allChildTags = Util.join(allChildTags, curChildTags);
            childTags.add(curChildTags);
            if (firstChildTags == null) {
                firstChildTags = curChildTags;
            } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) {
                childrenConsistent = false;
            }

            if (childName == null) {
                childName = child.nodeName();
            } else if (childrenSame && !childName.equals(child.nodeName())) {
                childrenSame = false;
            }

            if (!Util.contains(counter.orderedTags(), orderedTags)) {
                orderedTags.add(counter.orderedTags());
            }
        }
    }
    avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children;
    int avgChildLength = (int) avgChildLengthDouble;
    double avgChildDiff = 0;
    int maxChildDiff = 0;
    for (List<String> tagList : childTags) {
        avgChildDiff += allChildTags.size() - tagList.size();
        maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size());
    }
    avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size();

    childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent;

    NodeCounter counter = new NodeCounter(separated);
    int siblings = 0;
    for (Node sibling : node.parent().childNodes()) {
        if (!Util.isEmpty(sibling)) {
            siblings++;
        }
    }
    this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(),
            counter.content(), div(counter.items(), children), div(counter.blocks(), children),
            div(counter.decoration(), children), div(counter.formatting(), children),
            div(counter.content(), children),

            childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength,

            counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(),
            div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)),

            counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(),
            itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(),
            div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems),
            div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent),
            div(counter.nonAnchorChars(), counter.anchors()),
            div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars,
            div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()),
            div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children),

            counter.descendants(), counter.levels(), div(counter.descendants(), children),
            div(children, counter.levels()), siblings, children,

            maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent),
            orderedTags.size(),

            mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL),
            distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL),
            mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL),
            distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL),
            mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL),
            distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL),
            mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL),
            distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL),
            mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL),
            distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL),
            mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL),
            distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL),
            mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL),
            distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL),
            mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL),
            distance(counter.textAnchors(), RESULT_GROUP_LARGE),
            distance(counter.textAnchors(), RESULT_GROUP_SMALL),

            Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()),
            evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()),
            smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()),

            Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()),
            Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems),
            Math.abs(children - textChildren), Math.abs(childItems - textChildItems),
            Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems),
            evenlyDivisible(children, counter.textAnchors()),
            evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren),
            evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren),
            evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren),
            evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()),
            smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren),
            smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren),
            smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren),
            smallestMod(childItems, anchorTextChildItems),

            Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems),
            Math.abs(anchorTextChildren - anchorTextChildItems),

            toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()),
            toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()),
            toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()),
            toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()),
            toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()),
            toInt(statDescendants.getMean()), toInt(statFields.getSkewness()),
            toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()),
            toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()),
            toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()),
            toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()),
            toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()),
            toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()),
            toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()),
            toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()),
            toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()),
            toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()),
            toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()),
            toInt(statItemAnchorChars.getMean()), };
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

static boolean isLastSiblingTag(Node node) {
    List<Node> siblings = new ArrayList<>();
    for (Node n : node.parent().childNodes()) {
        if (!(n instanceof TextNode)) {
            siblings.add(n);/*www . ja  va  2  s  .  com*/
        }
    }
    return (node == siblings.get(siblings.size() - 1));
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        String text = ((TextNode) node).text().trim();
        if (members.containsKey(text)) {
            Node parent = node.parent();
            if (parent != null && !"td".equals(parent.nodeName())) {
                parent = parent.parent();
            }/*from w  ww. j a va 2 s.  com*/
            if (parent != null && !"td".equals(parent.nodeName())) {
                parent = parent.parent();
            }
            if (parent != null && "td".equals(parent.nodeName())) {
                List<Node> siblings = parent.parent().childNodes();
                List<Node> tdSiblings = new ArrayList<Node>();
                siblings.forEach(n -> {
                    if ("td".equals(n.nodeName()))
                        tdSiblings.add(n);
                });

                if (tdSiblings.get(0) == parent && tdSiblings.size() == 3) {
                    Node td = tdSiblings.get(2);
                    String s = td.toString();
                    String doc = "/** " + DocFiller.removeTags(s.substring(4, s.length() - 5)) + " */";
                    for (Declaration d : members.get(text)) {
                        d.setDocumentation(doc);
                    }
                    docFiller.countDoc(false);
                }
            }
        }
    }
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        String text = ((TextNode) node).text().trim();
        if (members.containsKey(text)) {
            Node parent = node.parent();
            if (parent != null && !"dt".equals(parent.nodeName())) {
                parent = parent.parent();
            }//  w w w .j a  v  a2s  . co  m
            if (parent != null && !"dt".equals(parent.nodeName())) {
                parent = parent.parent();
            }
            if (parent != null && "dt".equals(parent.nodeName())) {
                List<Node> siblings = parent.parent().childNodes();
                List<Node> dlSiblings = new ArrayList<Node>();
                siblings.forEach(n -> {
                    if ("dt".equals(n.nodeName()) || "dd".equals(n.nodeName()))
                        dlSiblings.add(n);
                });
                for (int i = 0; i < dlSiblings.size(); i++) {
                    if (dlSiblings.get(i) == parent) {
                        if (i < dlSiblings.size() - 1 && "dd".equals(dlSiblings.get(i + 1).nodeName())) {
                            String s = dlSiblings.get(i + 1).toString();
                            String doc = "/** " + DocFiller.removeTags(s.substring(4, s.length() - 5)) + " */";
                            for (Declaration d : members.get(text)) {
                                d.setDocumentation(doc);
                            }
                            docFiller.countDoc(false);
                        }
                        break;
                    }
                }
            }
        }
    }
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

@Override
public void head(Node node, int depth) {
    if (!found) {
        if (node.attr("id") != null && node.attr("id").equalsIgnoreCase("Quick_Links")) {
            found = true;/*from  w ww  .  ja  va  2 s.  c  o m*/
            while (DocFiller.isLastSiblingTag(node)) {
                node = node.parent();
            }

            Node n = node;

            while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) {
                description.append(n.outerHtml());
            }

            String s = Jsoup.parse(description.toString()).text();
            if ("".equals(s.trim())) {
                if (n == null) {
                    n = node;
                }
                while ((n = n.nextSibling()) != null
                        && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) {
                    description.append(n.outerHtml());
                }
            }

            typeDeclaration.setDocumentation("/** " + DocFiller.removeTags(description.toString()) + " */");
            docFiller.countDoc(true);
        }
    }
}