List of usage examples for org.jsoup.nodes Node childNodeSize
public abstract int childNodeSize();
From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java
private boolean findDocumentParts(Node root) { Node node = root; int depth = 0; while (node != null) { if (node.nodeName().compareTo("#text") != 0) { HashMap<String, Integer> level = allLevels.get(depth); // System.out.println(depth + " " + allLevels.size()); if (level.containsKey(node.nodeName() + "[class=" + node.attr("class") + "]")) { Integer get = level.get(node.nodeName() + "[class=" + node.attr("class") + "]"); level.put(node.nodeName() + "[class=" + node.attr("class") + "]", get + 1); } else { level.put(node.nodeName() + "[class=" + node.attr("class") + "]", 1); }// www .j a v a2 s . c o m } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } //ak je 0 alebo 1 datum, vratime false, kedze sa to neda zistit if (dateCount < 2) { return false; } else { return findOnePart(dateCount); } }
From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java
private void traversePage(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // if(node.attr("class").compareTo("contribution")==0){ // System.out.println(depth); // } if (maxDepth < depth) { maxDepth = depth;/* w w w. j av a 2 s .co m*/ } boolean analyze = analyze(node); if (analyze) { break; } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
private void markBadText(Node root) { Node node = root; int depth = 0; while (node != null) { //ak sa jedna o text, ktory ma menej ako 15 znakov if (node.nodeName().compareTo("#text") == 0) { if (node.toString().trim().length() < 20) { nodesToRemove.add(node); // System.out.println(node); }/* w w w. j a v a 2 s .c om*/ } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
public Node removeNodes(Node root, Node nodeToRemove) { Node node = root; Node ntr = nodeToRemove;/*from ww w.ja v a2s. c om*/ int depth = 0; while (node != null) { if (node.equals(ntr)) { node.remove(); return root; } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } return root; }
From source file:sk.svec.jan.acb.extraction.Finder.java
public void traversePage(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // System.out.println(node.attributes()); boolean analyze = analyze(node); if (analyze) { break; }//from w w w . j a v a 2s .c o m if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
public void traversePageFindAuthor(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // System.out.println(node.attributes()); for (Attribute attribute : node.attributes().asList()) { String value = attribute.getValue(); if (!foundAuthor) { foundAuthor = findAuthorInText(node, value); break; }//from www .j a v a2 s . c o m } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
private boolean analyze(Node node) { // System.out.println(node.nodeName()); for (Attribute attribute : node.attributes().asList()) { String key = attribute.getKey(); String value = attribute.getValue(); // System.out.println(" attr:" + key + " value:" + value); if (!foundDate) { boolean foundDateString = findDate(node, value); if (foundDateString) { if (node.childNodeSize() != 0) { String child = node.childNode(0).toString(); foundDate = findDateValue(node, child); dateScore = 10;/*from www. j a v a 2 s. co m*/ } else { } } else { // nodesToRemove.add(node); foundDate = findDateValue(node, value); dateScore = 5; } if (foundDate) { nodesToRemove.add(node); } } if (!foundAuthor) { foundAuthor = findAuthor(node, value); } } if (!foundTitle) { foundTitle = findTitle(node, node.nodeName()); } return foundDate && foundAuthor && foundTitle; }