List of usage examples for org.jsoup.nodes Element traverse
public Node traverse(NodeVisitor nodeVisitor)
From source file:com.screenslicer.core.scrape.ProcessPage.java
private static void trim(Element body) { final List<Node> toRemove = new ArrayList<Node>(); body.traverse(new NodeVisitor() { @Override// w w w . j a v a2 s . c om public void tail(Node n, int d) { } @Override public void head(Node node, int d) { if (Util.isHidden(node)) { toRemove.add(node); } } }); for (Node node : toRemove) { node.remove(); } }
From source file:com.screenslicer.core.util.BrowserUtil.java
public static Element openElement(final Browser browser, boolean init, final String[] whitelist, final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms) throws ActionFailed { try {/* w w w . j av a2 s .c o m*/ if (init) { int myStartId; synchronized (startIdLock) { startId = startId == Integer.MAX_VALUE ? 0 : startId + 1; myStartId = startId; } browser.executeScript(" var all = document.body.getElementsByTagName('*');" + "for(var i = 0; i < all.length; i++){" + " if(all[i].className && typeof all[i].className == 'string'){" + " all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + " }" + "}" + isVisible + "for(var j = 0; j < all.length; j++){" + " if(!all[j].className.match(/" + NODE_MARKER + "\\d+_\\d+/g)){" + " all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';" + " }" + " if(!isVisible(all[j])){" + " all[j].className += ' " + HIDDEN_MARKER + " ';" + " }" + "}"); } String url = browser.getCurrentUrl(); new URL(url); Element element = CommonUtil.parse(browser.getPageSource(), url, false).body(); element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) { NodeUtil.markVisible(node); } } }); if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0) || (urlNodes != null && urlNodes.length > 0)) { element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (node.nodeName().equals("a")) { if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, false); } } else { String urlAttr = UrlUtil.urlFromAttr(node); if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(), urlAttr, node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, true); } } } }); } if (WebApp.DEBUG) { try { FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"), element.outerHtml(), "utf-8"); } catch (IOException e) { } } return element; } catch (Browser.Retry r) { throw r; } catch (Browser.Fatal f) { throw f; } catch (Throwable t) { throw new ActionFailed(t); } }
From source file:com.screenslicer.core.util.Util.java
public static Element markTestElement(Element element) { element.traverse(new NodeVisitor() { @Override//from w ww . jav a 2 s .c o m public void tail(Node node, int level) { } @Override public void head(Node node, int level) { node.attr("class", nodeMarker.matcher(node.attr("class")).replaceAll("")); } }); element.traverse(new NodeVisitor() { int count = 0; @Override public void tail(Node node, int level) { } @Override public void head(Node node, int level) { ++count; node.attr("class", node.attr("class") + " " + NODE_MARKER + count + " "); } }); return element; }
From source file:com.screenslicer.core.util.Util.java
public static Element openElement(final RemoteWebDriver driver, final String[] whitelist, final String[] patterns, final UrlTransform[] transforms) throws ActionFailed { try {/*from w w w .j a v a2 s.c o m*/ driver.executeScript(" var all = document.getElementsByTagName('*');" + "for(var i = 0; i < all.length; i++){" + " if(all[i].className){" + " all[i].className=all[i].className.replace(/" + NODE_MARKER + "\\d+/g,'').replace(/" + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + " }" + "}" + isVisible + "for(var j = 0; j < all.length; j++){" + " all[j].className += ' " + NODE_MARKER + "'+j+' ';" + " if(!isVisible(all[j])){" + " all[j].className += ' " + HIDDEN_MARKER + " ';" + " }" + "}"); String url = driver.getCurrentUrl(); new URL(url); Element element = parse(driver.getPageSource(), url).body(); element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (!node.nodeName().equals("#text") && !isEmpty(node)) { markVisible(node); } } }); if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)) { element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (node.nodeName().equals("a")) { if (isUrlFiltered(driver.getCurrentUrl(), node.attr("href"), whitelist, patterns, transforms)) { markFiltered(node, false); } } else { String urlAttr = Util.urlFromAttr(node); if (!CommonUtil.isEmpty(urlAttr) && isUrlFiltered(driver.getCurrentUrl(), urlAttr, whitelist, patterns, transforms)) { markFiltered(node, true); } } } }); } return element; } catch (Exception e) { Log.exception(e); throw new ActionFailed(e); } }
From source file:org.structr.web.common.microformat.MicroformatParser.java
private void unwrap(final Element element) { final Set<Element> elementsToUnwrap = new LinkedHashSet<>(); element.traverse(new NodeVisitor() { @Override/*from w ww . jav a2 s. c o m*/ public void head(Node node, int depth) { if (node instanceof Element) { final Element element = (Element) node; if (element.isBlock()) { final Set<String> classes = element.classNames(); removeEmpty(classes); if (classes.isEmpty()) { elementsToUnwrap.add(element); } } } } @Override public void tail(Node node, int depth) { } }); for (final Element unwrap : elementsToUnwrap) { unwrap.unwrap(); } }
From source file:org.structr.web.common.microformat.MicroformatParser.java
private Object extractChildContent(final Element element) { final List<String> parts = new LinkedList<>(); element.traverse(new NodeVisitor() { @Override/* w ww . j ava2 s. com*/ public void head(Node node, int depth) { if (node instanceof Element) { final Element element = (Element) node; final Set<String> classes = element.classNames(); removeEmpty(classes); if (classes.isEmpty()) { parts.add(element.ownText()); } } } @Override public void tail(Node node, int depth) { } }); if (parts.isEmpty()) { final String ownText = element.ownText(); if (StringUtils.isNotBlank(ownText)) { parts.add(element.ownText()); } } if (parts.isEmpty()) { return null; } if (parts.size() == 1) { return parts.get(0); } return parts; }