List of usage examples for org.jsoup.nodes Node parent
public Node parent()
From source file:com.screenslicer.core.scrape.Dissect.java
public static String nodeHash(Node node, List<Node> nodes, boolean lenientUrl, boolean lenientTitle) { StringBuilder position = new StringBuilder(); Node cur = node; while (cur != null) { position.append("<<0>>"); position.append(cur.hashCode()); position.append("<<1>>"); position.append(cur.nodeName()); position.append("<<2>>"); position.append(cur.childNodes().size()); position.append("<<3>>"); position.append(cur.siblingIndex()); position.append("<<4>>"); cur = cur.parent(); }// ww w . j a va 2 s. c o m for (Node child : node.childNodes()) { position.append("<<c0>>"); position.append(child.hashCode()); position.append("<<c1>>"); position.append(child.nodeName()); position.append("<<c2>>"); position.append(child.childNodes().size()); position.append("<<c3>>"); position.append(child.siblingIndex()); position.append("<<c4>>"); } if (nodes != null) { for (Node n : nodes) { position.append("<<s0>>"); position.append(n.hashCode()); position.append("<<s1>>"); position.append(n.nodeName()); position.append("<<s2>>"); position.append(n.childNodes().size()); position.append("<<s3>>"); position.append(n.siblingIndex()); position.append("<<s4>>"); } } position.append("<<>>"); position.append(Util.outerHtml(node).hashCode()); return "dissectedResults-<<" + lenientUrl + ">>-<<" + lenientTitle + ">>-" + position.toString(); }
From source file:com.screenslicer.core.util.Util.java
public static int nearestBlock(Node node) { int nearest = 0; Node parent = node.parent(); while (parent != null) { ++nearest;//from w w w . j a v a 2s .c o m if (isProximityBlock(parent.nodeName())) { return nearest; } parent = parent.parent(); } return Integer.MAX_VALUE; }
From source file:com.screenslicer.core.util.Util.java
private static void markVisible(Node node) { if (node != null) { if (node.nodeName().equals("select")) { for (Node child : node.childNodes()) { child.attr("class", hiddenMarker.matcher(child.attr("class")).replaceAll("")); }/*from w w w . j a v a 2 s. com*/ } node.attr("class", hiddenMarker.matcher(node.attr("class")).replaceAll("")); markVisible(node.parent()); } }
From source file:com.screenslicer.core.scrape.type.ComparableNode.java
public ComparableNode(final Node node) { this.node = node; List<Node> separated = node.childNodes(); int children = 0; int childBlocks = 0; int childFormatting = 0; int childContent = 0; int childItems = 0; int childDecoration = 0; int anchorChildren = 0; int textChildren = 0; int anchorTextChildren = 0; int anchorChildItems = 0; int textChildItems = 0; int anchorTextChildItems = 0; int itemChars = 0; int itemAnchorChars = 0; List<String> firstChildTags = null; List<List<String>> orderedTags = new ArrayList<List<String>>(); List<String> allChildTags = new ArrayList<String>(); ArrayList<List<String>> childTags = new ArrayList<List<String>>(); boolean childrenConsistent = true; String childName = null;/*w ww . j a v a 2 s. c o m*/ boolean childrenSame = true; double avgChildLengthDouble = 0d; int nodeStrLen = Util.trimmedLen(node.toString()); DescriptiveStatistics statAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statAnchors = new DescriptiveStatistics(); DescriptiveStatistics statChars = new DescriptiveStatistics(); DescriptiveStatistics statDescendants = new DescriptiveStatistics(); DescriptiveStatistics statFields = new DescriptiveStatistics(); DescriptiveStatistics statLevels = new DescriptiveStatistics(); DescriptiveStatistics statLongestField = new DescriptiveStatistics(); DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statTextAnchors = new DescriptiveStatistics(); DescriptiveStatistics statStrLen = new DescriptiveStatistics(); DescriptiveStatistics statItemChars = new DescriptiveStatistics(); DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics(); for (Node child : separated) { if (!Util.isEmpty(child)) { children++; int childStrLen = Util.trimmedLen(child.toString()); avgChildLengthDouble += childStrLen; NodeCounter counter = new NodeCounter(child); if (Util.isItem(child.nodeName())) { ++childItems; anchorChildItems += counter.anchors() > 0 ? 1 : 0; textChildItems += counter.fields() > 0 ? 1 : 0; anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; itemChars += counter.chars(); itemAnchorChars += counter.anchorChars(); statItemChars.addValue(counter.chars()); statItemAnchorChars.addValue(counter.anchorChars()); } if (Util.isBlock(child.nodeName())) { ++childBlocks; } if (Util.isDecoration(child.nodeName())) { ++childDecoration; } if (Util.isFormatting(child.nodeName())) { ++childFormatting; } if (Util.isContent(child)) { ++childContent; } anchorChildren += counter.anchors() > 0 ? 1 : 0; textChildren += counter.fields() > 0 ? 1 : 0; anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; statAnchorChars.addValue(counter.anchorChars()); statAnchors.addValue(counter.anchors()); statChars.addValue(counter.chars()); statDescendants.addValue(counter.descendants()); statFields.addValue(counter.fields()); statLevels.addValue(counter.levels()); statLongestField.addValue(counter.longestField()); statNonAnchorChars.addValue(counter.nonAnchorChars()); statTextAnchors.addValue(counter.textAnchors()); statStrLen.addValue(childStrLen); List<String> curChildTags = counter.tags(); allChildTags = Util.join(allChildTags, curChildTags); childTags.add(curChildTags); if (firstChildTags == null) { firstChildTags = curChildTags; } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) { childrenConsistent = false; } if (childName == null) { childName = child.nodeName(); } else if (childrenSame && !childName.equals(child.nodeName())) { childrenSame = false; } if (!Util.contains(counter.orderedTags(), orderedTags)) { orderedTags.add(counter.orderedTags()); } } } avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children; int avgChildLength = (int) avgChildLengthDouble; double avgChildDiff = 0; int maxChildDiff = 0; for (List<String> tagList : childTags) { avgChildDiff += allChildTags.size() - tagList.size(); maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size()); } avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size(); childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent; NodeCounter counter = new NodeCounter(separated); int siblings = 0; for (Node sibling : node.parent().childNodes()) { if (!Util.isEmpty(sibling)) { siblings++; } } this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(), counter.content(), div(counter.items(), children), div(counter.blocks(), children), div(counter.decoration(), children), div(counter.formatting(), children), div(counter.content(), children), childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength, counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(), div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)), counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(), itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(), div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems), div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent), div(counter.nonAnchorChars(), counter.anchors()), div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars, div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()), div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children), counter.descendants(), counter.levels(), div(counter.descendants(), children), div(children, counter.levels()), siblings, children, maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent), orderedTags.size(), mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL), distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL), mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL), distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL), mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL), distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL), mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL), distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL), mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL), distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL), mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL), distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL), mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL), distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL), mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL), distance(counter.textAnchors(), RESULT_GROUP_LARGE), distance(counter.textAnchors(), RESULT_GROUP_SMALL), Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()), evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()), smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()), Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()), Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems), Math.abs(children - textChildren), Math.abs(childItems - textChildItems), Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems), evenlyDivisible(children, counter.textAnchors()), evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren), evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren), evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren), evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()), smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren), smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren), smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren), smallestMod(childItems, anchorTextChildItems), Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems), Math.abs(anchorTextChildren - anchorTextChildItems), toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()), toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()), toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()), toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()), toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()), toInt(statDescendants.getMean()), toInt(statFields.getSkewness()), toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()), toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()), toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()), toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()), toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()), toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()), toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()), toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()), toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()), toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()), toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()), toInt(statItemAnchorChars.getMean()), }; }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
static boolean isLastSiblingTag(Node node) { List<Node> siblings = new ArrayList<>(); for (Node n : node.parent().childNodes()) { if (!(n instanceof TextNode)) { siblings.add(n);/*www . ja va 2 s . com*/ } } return (node == siblings.get(siblings.size() - 1)); }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
@Override public void head(Node node, int depth) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (members.containsKey(text)) { Node parent = node.parent(); if (parent != null && !"td".equals(parent.nodeName())) { parent = parent.parent(); }/*from w ww. j a va 2 s. com*/ if (parent != null && !"td".equals(parent.nodeName())) { parent = parent.parent(); } if (parent != null && "td".equals(parent.nodeName())) { List<Node> siblings = parent.parent().childNodes(); List<Node> tdSiblings = new ArrayList<Node>(); siblings.forEach(n -> { if ("td".equals(n.nodeName())) tdSiblings.add(n); }); if (tdSiblings.get(0) == parent && tdSiblings.size() == 3) { Node td = tdSiblings.get(2); String s = td.toString(); String doc = "/** " + DocFiller.removeTags(s.substring(4, s.length() - 5)) + " */"; for (Declaration d : members.get(text)) { d.setDocumentation(doc); } docFiller.countDoc(false); } } } } }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
@Override public void head(Node node, int depth) { if (node instanceof TextNode) { String text = ((TextNode) node).text().trim(); if (members.containsKey(text)) { Node parent = node.parent(); if (parent != null && !"dt".equals(parent.nodeName())) { parent = parent.parent(); }// w w w .j a v a2s . co m if (parent != null && !"dt".equals(parent.nodeName())) { parent = parent.parent(); } if (parent != null && "dt".equals(parent.nodeName())) { List<Node> siblings = parent.parent().childNodes(); List<Node> dlSiblings = new ArrayList<Node>(); siblings.forEach(n -> { if ("dt".equals(n.nodeName()) || "dd".equals(n.nodeName())) dlSiblings.add(n); }); for (int i = 0; i < dlSiblings.size(); i++) { if (dlSiblings.get(i) == parent) { if (i < dlSiblings.size() - 1 && "dd".equals(dlSiblings.get(i + 1).nodeName())) { String s = dlSiblings.get(i + 1).toString(); String doc = "/** " + DocFiller.removeTags(s.substring(4, s.length() - 5)) + " */"; for (Declaration d : members.get(text)) { d.setDocumentation(doc); } docFiller.countDoc(false); } break; } } } } } }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
@Override public void head(Node node, int depth) { if (!found) { if (node.attr("id") != null && node.attr("id").equalsIgnoreCase("Quick_Links")) { found = true;/*from w ww . ja va 2 s. c o m*/ while (DocFiller.isLastSiblingTag(node)) { node = node.parent(); } Node n = node; while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } String s = Jsoup.parse(description.toString()).text(); if ("".equals(s.trim())) { if (n == null) { n = node; } while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } } typeDeclaration.setDocumentation("/** " + DocFiller.removeTags(description.toString()) + " */"); docFiller.countDoc(true); } } }