List of usage examples for org.jsoup.nodes Node nodeName
public abstract String nodeName();
From source file:Main.java
private static void removeComments(Node node) { for (int i = 0; i < node.childNodes().size();) { Node child = node.childNode(i); if (child.nodeName().equals("#comment")) child.remove();//from www . j av a 2s. co m else { removeComments(child); i++; } } }
From source file:Main.java
public static String GetNodeTextWithNewLines(Node node) { String text = ""; if (node != null) { for (Node childNode : node.childNodes()) { if (childNode.nodeName().toLowerCase().equals("br")) { text += "\n"; } else { if (childNode instanceof TextNode) { text += childNode;/*from w w w . ja v a 2 s.com*/ } else { text += GetNodeTextWithNewLines(childNode); } } } } return text; }
From source file:damo.three.ie.util.HtmlUtilities.java
private static void removeComments(Node node) { for (int i = 0; i < node.childNodes().size();) { Node child = node.childNode(i); if (child.nodeName().equals("#comment")) { child.remove();/*from w ww. ja va 2s. c o m*/ } else { removeComments(child); i++; } } }
From source file:org.brnvrn.Main.java
/** * Parse the HTML containing a category table and the interleaved comments ... *//* w w w . ja v a 2 s .c o m*/ private static void parseCategory(List<Tool> tools, Element tool_div, String category, boolean obsolete) { Tool tool = new Tool(obsolete); for (Node child : tool_div.select("tbody").first().childNodes()) { switch (child.nodeName()) { case "#comment": parseComment(tool, (Comment) child); break; case "tr": Element tr = (Element) child; if (tr.select("th").size() > 0) // Skip headings break; tool.setCategory(category); if (!parseTrTool(tool, tr)) System.out.println(" Could not parse: " + tr.outerHtml()); tools.add(tool); tool = new Tool(obsolete); break; } } }
From source file:com.bibisco.manager.TextEditorManager.java
private static void parseNode(HtmlParsingResult pHtmlParsingResult, Node pNode, boolean pBlnExcludeSpellCheck) { mLog.debug("Start parseNode(HtmlParsingResult, Node, boolean): ", pNode.nodeName()); if ("#text".equals(pNode.nodeName())) { parseTextNode(pHtmlParsingResult, pNode); } else if ("spellerror".equals(pNode.nodeName()) && pBlnExcludeSpellCheck) { // Do nothing } else if ("span".equals(pNode.nodeName()) && pNode.attr("style").equals("display: none;")) { // Do nothing } else {/* ww w . j a v a2 s . co m*/ if ("ul".equals(pNode.nodeName())) { pHtmlParsingResult.ulOpen = true; } if ("ol".equals(pNode.nodeName())) { pHtmlParsingResult.olOpen = true; } if ("li".equals(pNode.nodeName())) { if (pHtmlParsingResult.ulOpen) { pHtmlParsingResult.characterCount += 1; } else if (pHtmlParsingResult.olOpen) { pHtmlParsingResult.characterCount += 1; pHtmlParsingResult.olLiPosition += 1; pHtmlParsingResult.characterCount += String.valueOf(pHtmlParsingResult.olLiPosition).length(); } } for (Node lNode : pNode.childNodes()) { parseNode(pHtmlParsingResult, lNode, pBlnExcludeSpellCheck); } if ("ul".equals(pNode.nodeName())) { pHtmlParsingResult.ulOpen = false; } if ("ol".equals(pNode.nodeName())) { pHtmlParsingResult.olOpen = false; pHtmlParsingResult.olLiPosition = 0; } } mLog.debug("End parseNode(HtmlParsingResult, Node, boolean)"); }
From source file:com.zacwolf.commons.email.Email.java
public static void removeComments(org.jsoup.nodes.Node node) { for (int i = 0; i < node.childNodes().size(); i++) { org.jsoup.nodes.Node child = node.childNode(i); if (child.nodeName().equals("#comment")) child.remove();//ww w . ja va2s .c o m else removeComments(child); } }
From source file:org.coronastreet.gpxconverter.GarminForm.java
private static String findFlowKey(Node node) { String key = null;/*from w w w. j a v a 2 s . c o m*/ for (int i = 0; i < node.childNodes().size();) { Node child = node.childNode(i); if (child.nodeName().equals("#comment")) { //System.out.println(child.toString()); String flowKeyPattern = "\\<\\!-- flowExecutionKey\\: \\[(e1s1)\\] --\\>"; key = child.toString().replaceAll(flowKeyPattern, "$1").trim(); break; } else { findFlowKey(child); i++; } } return key; }
From source file:com.sfs.DataFilter.java
/** * Removes the comments.//from w w w.ja v a2s. c om * * @param node the node */ private static void removeComments(Node node) { for (int i = 0; i < node.childNodes().size();) { Node child = node.childNode(i); if (child.nodeName().equals("#comment")) child.remove(); else { removeComments(child); i++; } } }
From source file:com.screenslicer.common.CommonUtil.java
private static Element sanitize(Document doc, final boolean ascii) { if (ascii) {// w ww .j a v a 2s . c o m doc.outputSettings().charset("ascii"); } else { doc.outputSettings().charset("utf-8"); } doc.traverse(new NodeVisitor() { @Override public void tail(Node n, int d) { } @Override public void head(Node n, int d) { try { if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) { ((TextNode) n).text(HtmlCoder.decode(n.toString())); } } catch (Throwable t) { Log.exception(t); } } }); return doc; }
From source file:com.screenslicer.core.util.BrowserUtil.java
public static Element openElement(final Browser browser, boolean init, final String[] whitelist, final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms) throws ActionFailed { try {// ww w .ja v a 2 s . co m if (init) { int myStartId; synchronized (startIdLock) { startId = startId == Integer.MAX_VALUE ? 0 : startId + 1; myStartId = startId; } browser.executeScript(" var all = document.body.getElementsByTagName('*');" + "for(var i = 0; i < all.length; i++){" + " if(all[i].className && typeof all[i].className == 'string'){" + " all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + " }" + "}" + isVisible + "for(var j = 0; j < all.length; j++){" + " if(!all[j].className.match(/" + NODE_MARKER + "\\d+_\\d+/g)){" + " all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';" + " }" + " if(!isVisible(all[j])){" + " all[j].className += ' " + HIDDEN_MARKER + " ';" + " }" + "}"); } String url = browser.getCurrentUrl(); new URL(url); Element element = CommonUtil.parse(browser.getPageSource(), url, false).body(); element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) { NodeUtil.markVisible(node); } } }); if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0) || (urlNodes != null && urlNodes.length > 0)) { element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (node.nodeName().equals("a")) { if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, false); } } else { String urlAttr = UrlUtil.urlFromAttr(node); if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(), urlAttr, node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, true); } } } }); } if (WebApp.DEBUG) { try { FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"), element.outerHtml(), "utf-8"); } catch (IOException e) { } } return element; } catch (Browser.Retry r) { throw r; } catch (Browser.Fatal f) { throw f; } catch (Throwable t) { throw new ActionFailed(t); } }