Example usage for org.jsoup.nodes Node childNodeSize

List of usage examples for org.jsoup.nodes Node childNodeSize

Introduction

In this page you can find the example usage for org.jsoup.nodes Node childNodeSize.

Prototype

public abstract int childNodeSize();

Source Link

Document

Get the number of child nodes that this node holds.

Usage

From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java

private boolean findDocumentParts(Node root) {

    Node node = root;
    int depth = 0;

    while (node != null) {

        if (node.nodeName().compareTo("#text") != 0) {
            HashMap<String, Integer> level = allLevels.get(depth);
            //            System.out.println(depth + " " + allLevels.size());
            if (level.containsKey(node.nodeName() + "[class=" + node.attr("class") + "]")) {
                Integer get = level.get(node.nodeName() + "[class=" + node.attr("class") + "]");

                level.put(node.nodeName() + "[class=" + node.attr("class") + "]", get + 1);
            } else {
                level.put(node.nodeName() + "[class=" + node.attr("class") + "]", 1);
            }// www .j  a  v  a2  s .  c  o m
        }

        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }
    //ak je 0 alebo 1 datum, vratime false, kedze sa to neda zistit
    if (dateCount < 2) {
        return false;
    } else {
        return findOnePart(dateCount);
    }

}

From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java

private void traversePage(Node root) {
    Node node = root;
    int depth = 0;

    while (node != null) {
        //            System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize());
        //          if(node.attr("class").compareTo("contribution")==0){
        //              System.out.println(depth);
        //          }
        if (maxDepth < depth) {
            maxDepth = depth;/* w w w. j  av a  2 s  .co  m*/
        }

        boolean analyze = analyze(node);
        if (analyze) {
            break;
        }
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }
}

From source file:sk.svec.jan.acb.extraction.Finder.java

private void markBadText(Node root) {
    Node node = root;
    int depth = 0;

    while (node != null) {
        //ak sa jedna o text, ktory ma menej ako 15 znakov
        if (node.nodeName().compareTo("#text") == 0) {
            if (node.toString().trim().length() < 20) {
                nodesToRemove.add(node);
                //                    System.out.println(node);
            }/*  w  w  w. j  a  v  a  2 s .c om*/

        }
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }

}

From source file:sk.svec.jan.acb.extraction.Finder.java

public Node removeNodes(Node root, Node nodeToRemove) {
    Node node = root;
    Node ntr = nodeToRemove;/*from  ww  w.ja v a2s. c om*/
    int depth = 0;

    while (node != null) {
        if (node.equals(ntr)) {
            node.remove();
            return root;
        }
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }
    return root;
}

From source file:sk.svec.jan.acb.extraction.Finder.java

public void traversePage(Node root) {
    Node node = root;
    int depth = 0;

    while (node != null) {
        //            System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize());
        //            System.out.println(node.attributes());
        boolean analyze = analyze(node);
        if (analyze) {
            break;
        }//from w  w w  . j a  v a  2s  .c  o  m
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }
}

From source file:sk.svec.jan.acb.extraction.Finder.java

public void traversePageFindAuthor(Node root) {
    Node node = root;
    int depth = 0;

    while (node != null) {
        //            System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize());
        //            System.out.println(node.attributes());
        for (Attribute attribute : node.attributes().asList()) {

            String value = attribute.getValue();
            if (!foundAuthor) {
                foundAuthor = findAuthorInText(node, value);
                break;
            }//from   www  .j  a v  a2 s  . c o m

        }
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                node = node.parentNode();
                depth--;
            }

            if (node == root) {
                break;
            }
            node = node.nextSibling();
        }

    }
}

From source file:sk.svec.jan.acb.extraction.Finder.java

private boolean analyze(Node node) {
    // System.out.println(node.nodeName());

    for (Attribute attribute : node.attributes().asList()) {
        String key = attribute.getKey();
        String value = attribute.getValue();
        //            System.out.println(" attr:" + key + " value:" + value);
        if (!foundDate) {
            boolean foundDateString = findDate(node, value);
            if (foundDateString) {
                if (node.childNodeSize() != 0) {
                    String child = node.childNode(0).toString();
                    foundDate = findDateValue(node, child);
                    dateScore = 10;/*from  www. j a v a 2 s.  co  m*/
                } else {

                }

            } else {
                //                    nodesToRemove.add(node);
                foundDate = findDateValue(node, value);
                dateScore = 5;
            }
            if (foundDate) {
                nodesToRemove.add(node);
            }
        }

        if (!foundAuthor) {
            foundAuthor = findAuthor(node, value);
        }

    }
    if (!foundTitle) {
        foundTitle = findTitle(node, node.nodeName());
    }
    return foundDate && foundAuthor && foundTitle;
}