Example usage for org.jsoup.nodes Element traverse

List of usage examples for org.jsoup.nodes Element traverse

Introduction

In this page you can find the example usage for org.jsoup.nodes Element traverse.

Prototype

public Node traverse(NodeVisitor nodeVisitor) 

Source Link

Document

Perform a depth-first traversal through this node and its descendants.

Usage

From source file:com.screenslicer.core.scrape.ProcessPage.java

private static void trim(Element body) {
    final List<Node> toRemove = new ArrayList<Node>();
    body.traverse(new NodeVisitor() {
        @Override// w  w  w  . j a v  a2  s  . c om
        public void tail(Node n, int d) {
        }

        @Override
        public void head(Node node, int d) {
            if (Util.isHidden(node)) {
                toRemove.add(node);
            }
        }
    });
    for (Node node : toRemove) {
        node.remove();
    }
}

From source file:com.screenslicer.core.util.BrowserUtil.java

public static Element openElement(final Browser browser, boolean init, final String[] whitelist,
        final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms)
        throws ActionFailed {
    try {/* w  w  w  .  j  av  a2 s  .c o m*/
        if (init) {
            int myStartId;
            synchronized (startIdLock) {
                startId = startId == Integer.MAX_VALUE ? 0 : startId + 1;
                myStartId = startId;
            }
            browser.executeScript("      var all = document.body.getElementsByTagName('*');"
                    + "for(var i = 0; i < all.length; i++){"
                    + "  if(all[i].className && typeof all[i].className == 'string'){"
                    + "    all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/"
                    + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER
                    + "/g,'').replace(/\\s+/g,' ').trim();" + "  }" + "}" + isVisible
                    + "for(var j = 0; j < all.length; j++){" + "  if(!all[j].className.match(/" + NODE_MARKER
                    + "\\d+_\\d+/g)){" + "    all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';"
                    + "  }" + "  if(!isVisible(all[j])){" + "    all[j].className += ' " + HIDDEN_MARKER + " ';"
                    + "  }" + "}");
        }
        String url = browser.getCurrentUrl();
        new URL(url);
        Element element = CommonUtil.parse(browser.getPageSource(), url, false).body();
        element.traverse(new NodeVisitor() {
            @Override
            public void tail(Node node, int depth) {
            }

            @Override
            public void head(Node node, int depth) {
                if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) {
                    NodeUtil.markVisible(node);
                }
            }
        });
        if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)
                || (urlNodes != null && urlNodes.length > 0)) {
            element.traverse(new NodeVisitor() {
                @Override
                public void tail(Node node, int depth) {
                }

                @Override
                public void head(Node node, int depth) {
                    if (node.nodeName().equals("a")) {
                        if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist,
                                patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, false);
                        }
                    } else {
                        String urlAttr = UrlUtil.urlFromAttr(node);
                        if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(),
                                urlAttr, node, whitelist, patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, true);
                        }
                    }
                }
            });
        }
        if (WebApp.DEBUG) {
            try {
                FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"),
                        element.outerHtml(), "utf-8");
            } catch (IOException e) {
            }
        }
        return element;
    } catch (Browser.Retry r) {
        throw r;
    } catch (Browser.Fatal f) {
        throw f;
    } catch (Throwable t) {
        throw new ActionFailed(t);
    }
}

From source file:com.screenslicer.core.util.Util.java

public static Element markTestElement(Element element) {
    element.traverse(new NodeVisitor() {
        @Override//from w ww . jav a 2 s  .c o  m
        public void tail(Node node, int level) {
        }

        @Override
        public void head(Node node, int level) {
            node.attr("class", nodeMarker.matcher(node.attr("class")).replaceAll(""));
        }
    });
    element.traverse(new NodeVisitor() {
        int count = 0;

        @Override
        public void tail(Node node, int level) {
        }

        @Override
        public void head(Node node, int level) {
            ++count;
            node.attr("class", node.attr("class") + " " + NODE_MARKER + count + " ");
        }
    });
    return element;
}

From source file:com.screenslicer.core.util.Util.java

public static Element openElement(final RemoteWebDriver driver, final String[] whitelist,
        final String[] patterns, final UrlTransform[] transforms) throws ActionFailed {
    try {/*from   w w  w  .j a  v  a2 s.c  o  m*/
        driver.executeScript("      var all = document.getElementsByTagName('*');"
                + "for(var i = 0; i < all.length; i++){" + "  if(all[i].className){"
                + "    all[i].className=all[i].className.replace(/" + NODE_MARKER + "\\d+/g,'').replace(/"
                + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/"
                + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + "  }" + "}" + isVisible
                + "for(var j = 0; j < all.length; j++){" + "  all[j].className += ' " + NODE_MARKER + "'+j+' ';"
                + "  if(!isVisible(all[j])){" + "    all[j].className += ' " + HIDDEN_MARKER + " ';" + "  }"
                + "}");
        String url = driver.getCurrentUrl();
        new URL(url);
        Element element = parse(driver.getPageSource(), url).body();
        element.traverse(new NodeVisitor() {
            @Override
            public void tail(Node node, int depth) {
            }

            @Override
            public void head(Node node, int depth) {
                if (!node.nodeName().equals("#text") && !isEmpty(node)) {
                    markVisible(node);
                }
            }
        });
        if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)) {
            element.traverse(new NodeVisitor() {
                @Override
                public void tail(Node node, int depth) {
                }

                @Override
                public void head(Node node, int depth) {
                    if (node.nodeName().equals("a")) {
                        if (isUrlFiltered(driver.getCurrentUrl(), node.attr("href"), whitelist, patterns,
                                transforms)) {
                            markFiltered(node, false);
                        }
                    } else {
                        String urlAttr = Util.urlFromAttr(node);
                        if (!CommonUtil.isEmpty(urlAttr) && isUrlFiltered(driver.getCurrentUrl(), urlAttr,
                                whitelist, patterns, transforms)) {
                            markFiltered(node, true);
                        }
                    }
                }
            });
        }
        return element;
    } catch (Exception e) {
        Log.exception(e);
        throw new ActionFailed(e);
    }
}

From source file:org.structr.web.common.microformat.MicroformatParser.java

private void unwrap(final Element element) {

    final Set<Element> elementsToUnwrap = new LinkedHashSet<>();

    element.traverse(new NodeVisitor() {

        @Override/*from   w  ww  . jav a2  s.  c o m*/
        public void head(Node node, int depth) {

            if (node instanceof Element) {

                final Element element = (Element) node;

                if (element.isBlock()) {
                    final Set<String> classes = element.classNames();

                    removeEmpty(classes);

                    if (classes.isEmpty()) {
                        elementsToUnwrap.add(element);
                    }
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });

    for (final Element unwrap : elementsToUnwrap) {
        unwrap.unwrap();
    }
}

From source file:org.structr.web.common.microformat.MicroformatParser.java

private Object extractChildContent(final Element element) {

    final List<String> parts = new LinkedList<>();

    element.traverse(new NodeVisitor() {

        @Override/* w  ww . j  ava2  s. com*/
        public void head(Node node, int depth) {

            if (node instanceof Element) {

                final Element element = (Element) node;
                final Set<String> classes = element.classNames();

                removeEmpty(classes);

                if (classes.isEmpty()) {

                    parts.add(element.ownText());
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });

    if (parts.isEmpty()) {

        final String ownText = element.ownText();
        if (StringUtils.isNotBlank(ownText)) {

            parts.add(element.ownText());
        }
    }

    if (parts.isEmpty()) {
        return null;
    }

    if (parts.size() == 1) {
        return parts.get(0);
    }

    return parts;
}