List of usage examples for org.jsoup.nodes Node outerHtml
public String outerHtml()
From source file:com.screenslicer.common.CommonUtil.java
private static Element sanitize(Document doc, final boolean ascii) { if (ascii) {/* ww w . j a va2 s.c o m*/ doc.outputSettings().charset("ascii"); } else { doc.outputSettings().charset("utf-8"); } doc.traverse(new NodeVisitor() { @Override public void tail(Node n, int d) { } @Override public void head(Node n, int d) { try { if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) { ((TextNode) n).text(HtmlCoder.decode(n.toString())); } } catch (Throwable t) { Log.exception(t); } } }); return doc; }
From source file:com.screenslicer.core.scrape.trainer.TrainerVisitorExtractOnce.java
@Override public int visit(int curTrainingData, int page) { long start = System.currentTimeMillis(); Node winner = Extract.perform(elements.get(curTrainingData), 1, null, null).get(0); long dur = System.currentTimeMillis() - start; if (winner == null || !winner.outerHtml().startsWith(resultParents.get(curTrainingData))) { System.out.println("Fail: " + names[curTrainingData] + (winner == null ? ", null" : "")); } else {/*from w w w . ja va 2 s .c om*/ System.out.println(dur); } return -1; }
From source file:com.screenslicer.core.scrape.trainer.TrainerVisitorProceed.java
@Override public int visit(int curTrainingData) { int result = 0; if (!nextButtons.get(curTrainingData).equals("unknown")) { Node next = Proceed.perform(elements.get(curTrainingData), 2).node; if (next == null && nextButtons.get(curTrainingData).equals("n/a")) { System.out.println("pass - " + names[curTrainingData]); } else if (next != null && CommonUtil.strip(next.outerHtml(), false).replace(" ", "") .startsWith(CommonUtil.strip(nextButtons.get(curTrainingData), false).replace(" ", ""))) { System.out.println("pass - " + names[curTrainingData]); } else {/*from w ww. j a v a2 s . c o m*/ System.out.println("fail - " + names[curTrainingData]); if (next != null) { System.out.println("Actual--" + CommonUtil.strip(next.outerHtml(), false)); } System.out.println("Expected--" + CommonUtil.strip(nextButtons.get(curTrainingData), false)); result = 1; } } return result; }
From source file:com.screenslicer.core.util.Util.java
public static String outerHtml(Node node) { if (htmlCache.containsKey(node)) { return htmlCache.get(node); }//from www .j a v a 2 s.c om String html = node.outerHtml(); if (htmlCache.size() == MAX_HTML_CACHE) { htmlCache.clear(); } htmlCache.put(node, html); return html; }
From source file:sample.ui.mvc.MessageController.java
private String getBidId(Message message) { try {/*from w ww .j a v a 2 s . co m*/ BasicCookieStore cookieStore = new BasicCookieStore(); CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(cookieStore).build(); doLogin(cookieStore, httpclient, ZHANGDAIYIXIAN); // String bidName = message.getBidName(); // time // String mainUrl = "http://www.wujinsuo.cn:80/index.php"; HttpGet httpget = new HttpGet(mainUrl); httpget.addHeader("Accept", ACCEPT); httpget.addHeader("User-Agent", AGENT); ResponseHandler<String> responseHandler = new ResponseHandler<String>() { public String handleResponse(final HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status >= 200 && status < 300) { HttpEntity entity = response.getEntity(); return entity != null ? EntityUtils.toString(entity) : null; } else { throw new ClientProtocolException("Unexpected response status: " + status); } } }; String resultString = httpclient.execute(httpget, responseHandler); // parse html Document doc = Jsoup.parse(resultString); Elements links = doc.select("a[href]"); Element aElement = null; for (Element e : links) { List<Node> childNode = e.childNodes(); if (childNode.size() != 1) continue; Node node = childNode.get(0); if ("span".equals(node.nodeName())) { String html = node.outerHtml(); logger.info(html); if (html.contains(bidName)) { // okle aElement = e; } } } if (aElement == null) { // retry return ""; } else { String href = aElement.attr("href"); String bidId = StringUtils.substringAfter(href, "id="); logger.info(bidId); return bidId; } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
@Override public void head(Node node, int depth) { if (!found) { if (node.attr("id") != null && node.attr("id").equalsIgnoreCase("Quick_Links")) { found = true;/*from w ww. j av a 2 s .c om*/ while (DocFiller.isLastSiblingTag(node)) { node = node.parent(); } Node n = node; while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } String s = Jsoup.parse(description.toString()).text(); if ("".equals(s.trim())) { if (n == null) { n = node; } while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } } typeDeclaration.setDocumentation("/** " + DocFiller.removeTags(description.toString()) + " */"); docFiller.countDoc(true); } } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
public void getHtmlStartingFromText(String text, StringBuilder builder, List<Node> nodesList, boolean append) { for (Node node : nodesList) { String nodeName = node.nodeName(); if (append) { builder.append(node.outerHtml()); continue; }/*from www.j a v a 2 s . c o m*/ if (nodeName.equalsIgnoreCase("#text")) { if (node.toString().trim().equalsIgnoreCase(text)) append = true; } getHtmlStartingFromText(text, builder, node.childNodes(), append); } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
private void getHtmlStartingFromNode(String nodeType, String attrib, String attribValue, StringBuilder builder, List<Node> nodesList, boolean append) { for (Node node : nodesList) { String nodeName = node.nodeName(); if (append) { if (node.nodeName().equalsIgnoreCase("#text") && node.outerHtml().charAt(0) != ' ') builder.append(" "); builder.append(node.outerHtml()); if (!node.nodeName().equalsIgnoreCase("#text")) builder.append(" "); continue; }/*from w w w .j ava2s. co m*/ if (nodeName.equalsIgnoreCase(nodeType)) { if (node.attributes().hasKey(attrib) && node.attr(attrib).equalsIgnoreCase(attribValue)) append = true; } getHtmlStartingFromNode(nodeType, attrib, attribValue, builder, node.childNodes(), append); } }