List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:app.data.parse.WebPageUtil.java
public static WebPageInfo parse(String url, Cache<String, WebPageInfo> urlInfoCache) throws IOException { String original = url;//from ww w .j a v a 2 s.co m // hit toutiao.io // fixme http://toutiao.io/shares/640539/url if (original.startsWith("https://toutiao.io/posts/")) { original = original.replace("/posts/", "/k/"); } // check cache WebPageInfo info = urlInfoCache != null ? urlInfoCache.getIfPresent(original) : null; if (info != null) { return info; } else { info = new WebPageInfo(); info.url = original; } // attach url Document doc = requestUrl(info.url); info.url = doc.baseUri(); // or doc.location() // hit gold.xitu.io if (info.url.startsWith("http://gold.xitu.io/entry/")) { Elements origin = doc.select("div[class=ellipsis]"); Elements originLink = origin.select("a[class=share-link]"); info.url = originLink.attr("href"); // reconnect doc = requestUrl(info.url); info.url = doc.baseUri(); // or doc.location() } info.url = smartUri(info.url); // get title Elements metaTitle = doc.select("meta[property=og:title]"); if (metaTitle != null) { info.title = metaTitle.attr("content"); } if (StringUtils.isEmpty(info.title)) { metaTitle = doc.select("meta[property=twitter:title]"); if (metaTitle != null) { info.title = metaTitle.attr("content"); } info.title = StringUtils.isEmpty(info.title) ? doc.title() : info.title; } // get desc Elements metaDesc = doc.select("meta[property=og:description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.select("meta[property=twitter:description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.select("meta[name=description]"); if (metaDesc != null) { info.description = metaDesc.attr("content"); } if (StringUtils.isEmpty(info.description)) { metaDesc = doc.body().select("p"); if (metaDesc != null) { for (Element element : metaDesc) { info.description = element.text(); if (info.description != null && info.description.length() >= 20) { break; } } } } } } info.description = ellipsis(info.description, 140, "..."); // cache info if (urlInfoCache != null) { urlInfoCache.put(original, info); } return info; }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void getTestCaseHtmlPath(String path) { Document htmlFile = null; try {// ww w .j a va2s.co m htmlFile = Jsoup.parse(new File(path), "UTF-8"); } catch (IOException e) { System.out.println("Exception in parse Current Run html file" + e.getMessage()); } for (Element table : htmlFile.select("table[id=tableStyle]")) { Elements row1 = table.select("tr"); for (int j = 0; j < row1.size(); j++) { Element tds1 = row1.get(j); Elements tds = tds1.select("td"); for (int i = 0; i < tds.size(); i++) { Element link = tds.get(i); Elements href = link.select("a"); if (i == 0) { if (href.size() > 0) { String[] temp_ar = href.get(0).text("href").toString().split("\""); getTestDescription(temp_ar[1]); break; } } } } } }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void getTestDescription(String path) { Document htmlFile = null; try {/*w w w . j a v a 2 s . co m*/ htmlFile = Jsoup.parse(new File(basepath + path), "UTF-8"); } catch (IOException e) { System.out.println("Exception in parse Current Run html file" + e.getMessage()); } for (Element table : htmlFile.select("table[id=tableStyle]")) { Elements row1 = table.select("tr"); for (int j = 0; j < row1.size(); j++) { Element tds1 = row1.get(j); Elements tds = tds1.select("td"); for (int i = 0; i < tds.size(); i++) { Element link = tds.get(i); String link_temp = link.toString(); if (i == 1) { // System.out.println("data" + link_temp); if (!TestCaseDesMap.containsKey(path)) { TestCaseDesMap.put(path, Jsoup.parse(link_temp).text()); } break; } } } } }
From source file:com.itcs.commons.email.EmailAutoconfigClient.java
private static void extractOutgoingServerSettings(Document doc, Map<String, String> settings) { for (Element element : doc.select("outgoingServer")) { // System.out.println("element.attr(\"type\"):"+element.attr("type")); if (element.attr("type").equals("smtp")) { // System.out.println("element.select(\"hostname\"):" + element.select("hostname").text()); settings.put(EnumEmailSettingKeys.SMTP_SERVER.getKey(), element.select("hostname").text()); // System.out.println("element.select(\"port\"):" + element.select("port").text()); settings.put(EnumEmailSettingKeys.SMTP_PORT.getKey(), element.select("port").text()); // System.out.println("element.select(\"socketType\"):" + element.select("socketType").text()); settings.put(EnumEmailSettingKeys.SMTP_SSL_ENABLED.getKey(), element.select("socketType").text().equals("SSL") ? "true" : "false"); settings.put(EnumEmailSettingKeys.TRANSPORT_TLS.getKey(), element.select("socketType").text().equals("STARTTLS") ? "true" : "false"); }/*from ww w . java2 s . c o m*/ } }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void replaceDetailsTable(String path) throws IOException { File source = new File(path); Document report = null; try {/* w w w . jav a2s . co m*/ report = Jsoup.parse(source, "UTF-8"); } catch (IOException e) { System.out.println("Unable to open [" + source.getAbsolutePath() + "] for parsing!"); } Elements dom = report.children(); Elements tds = report.select("table[id=tableStyle] td"); // select the tds from your table String temp_key = ""; for (Element td : tds) { // loop through them String[] temp_ar = td.toString().split("\""); String Key = temp_ar[1]; String Status = ""; if (td.toString().contains("pass.png")) { Status = "pass"; } if (td.toString().contains("fail.png")) { Status = "fail"; } if (td.toString().contains("skip.png")) { Status = "skip"; } if (TestCaseDesMap.containsKey(temp_key) && Status.length() > 1) { TestcaseStatusMap.put(temp_key, Status); temp_key = ""; } if (td.text().contains("Test Method")) { // found the one you want String TestcaseDes; if (!TestCaseDesMap.containsKey(Key)) { TestcaseDes = " --------- "; TestCaseDesMap.put(Key, TestcaseDes); temp_key = Key; } else { TestcaseDes = TestCaseDesMap.get(Key); temp_key = Key; // TestcaseStatusMap.put(Key, Status); } td.text(TestcaseDes); // Replace with your text } } Elements ths = report.select("table[id=tableStyle] th"); // select the tds from your table for (Element th : ths) { // loop through them if (th.text().contains("Method Type")) { // found the one you want th.text("TestCase Description"); } if (th.text().contains("Test Case Name")) { // found the one you want th.text("Testng Method"); } } if (!source.canWrite()) { System.out.println("Can't write this file!");//Just check if the file is writable or not } BufferedWriter bw = new BufferedWriter(new FileWriter(source)); bw.write(dom.toString()); //toString will give all the elements as a big string bw.close(); //Close to apply the changes // genarateFailureReport(new File("C:\\Users\\yanamalp\\Desktop\\Gen_jelly\\HTML_Design_Files\\CSS\\HtmlReport.html"), "c:\\"); }
From source file:net.intelliant.util.UtilCommon.java
public static String getModifiedHtmlWithAbsoluteImagePath(String html) { if (UtilValidate.isEmpty(html)) { return html; }// ww w . jav a2s. c o m org.jsoup.nodes.Document doc = Jsoup.parse(html); Elements images = doc.select("img[src~=(?i)\\.(jpg|jpeg|png|gif)]"); if (images != null && images.size() > 0) { String srcAttributeValue = ""; StringBuilder finalLocation = new StringBuilder(); Set<String> imageSrc = new HashSet<String>(); for (Element image : images) { srcAttributeValue = image.attr("src"); if (!imageSrc.contains(srcAttributeValue)) { int separatorIndex = srcAttributeValue.lastIndexOf("/"); if (separatorIndex == -1) { separatorIndex = srcAttributeValue .lastIndexOf("\\"); /** just in case some one plays with html source. */ } String outputFileName = null; if (separatorIndex != -1) { String originalFileName = srcAttributeValue.substring(separatorIndex + 1); outputFileName = originalFileName; } finalLocation = new StringBuilder(imageUploadLocation); finalLocation = finalLocation.append(outputFileName); imageSrc.add(srcAttributeValue); html = StringUtil.replaceString(html, srcAttributeValue, finalLocation.toString()); } } } return html; }
From source file:com.itcs.commons.email.EmailAutoconfigClient.java
private static boolean existsIncommingType(String emailAddress, String type) { if (existsAutoconfigSettings(emailAddress)) { try {// ww w . j a v a 2 s . c o m String domain = "gmail.com"; if (!isGmailAddress(emailAddress)) { domain = extractDomain(emailAddress); } Document doc = settingsCache.get(domain); for (Element element : doc.select("incomingServer")) { if (element.attr("type").equals(type)) { return true; } } } catch (Exception ex) { // ex.printStackTrace(); } } return false; }
From source file:com.itcs.commons.email.EmailAutoconfigClient.java
private static void extractIncommingServerSettings(Document doc, Map<String, String> settings, String type) { for (Element element : doc.select("incomingServer")) { // System.out.println("element.attr(\"type\"):"+element.attr("type")); if (element.attr("type").equals(type)) { // System.out.println("element.select(\"hostname\"):" + element.select("hostname").text()); settings.put(EnumEmailSettingKeys.INBOUND_SERVER.getKey(), element.select("hostname").text()); // System.out.println("element.select(\"port\"):" + element.select("port").text()); settings.put(EnumEmailSettingKeys.INBOUND_PORT.getKey(), element.select("port").text()); // System.out.println("element.select(\"socketType\"):" + element.select("socketType").text()); settings.put(EnumEmailSettingKeys.INBOUND_SSL_ENABLED.getKey(), element.select("socketType").text().trim().equals("SSL") ? "true" : "false"); }/*from www . j a v a 2 s .co m*/ } }
From source file:org.wso2.carbon.appmgt.sampledeployer.main.ApplicationPublisher.java
private static void accsesWebPages(String webContext, String trackingCode, int hitCount) { String loginHtmlPage = null;// w w w.j a va 2 s. c o m String webAppurl = "http://" + ipAddress + ":8280" + webContext + "/1.0.0/"; String responceHtml = null; try { loginHtmlPage = httpHandler.getHtml(webAppurl); Document html = Jsoup.parse(loginHtmlPage); Element something = html.select("input[name=sessionDataKey]").first(); String sessionDataKey = something.val(); responceHtml = httpHandler.doPostHttps(backEndUrl + "/commonauth", "username=admin&password=admin&sessionDataKey=" + sessionDataKey, "none", "application/x-www-form-urlencoded; charset=UTF-8"); Document postHtml = Jsoup.parse(responceHtml); Element postHTMLResponse = postHtml.select("input[name=SAMLResponse]").first(); String samlResponse = postHTMLResponse.val(); String appmSamlSsoTokenId = httpHandler.doPostHttp(webAppurl, "SAMLResponse=" + URLEncoder.encode(samlResponse, "UTF-8"), "appmSamlSsoTokenId", "application/x-www-form-urlencoded; charset=UTF-8"); for (int i = 0; i < hitCount; i++) { if (webContext.equals("/notifi")) { if (i == hitCount / 5) { webAppurl += "member/"; } else if (i == hitCount / 2) { webAppurl = appendPageToUrl("admin", webAppurl, false); } } else if (webContext.equals("/travelBooking")) { if (i == hitCount / 5) { webAppurl = appendPageToUrl("booking-step1.jsp", webAppurl, true); } else if (i == hitCount / 2) { webAppurl = appendPageToUrl("booking-step2.jsp", webAppurl, false); } } httpHandler.doGet("http://" + ipAddress + ":8280/statistics/", trackingCode, appmSamlSsoTokenId, webAppurl); log.info("Web Page : " + webAppurl + " Hit count : " + i); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text updatePinContent(String url, DBCollection pinsCollection) throws JSONException, IOException { // add more related pins, include more boards String id = url.split("/pin/")[1]; DBCursor c = pinsCollection.find(new BasicDBObject("ID", id)); DBObject oldPin = c.next();// w ww .j ava 2 s . co m JSONArray oldBoards = new JSONArray(oldPin.get("board").toString()); JSONArray oldRltPin = new JSONArray(oldPin.get("related_pins").toString()); Document doc = Jsoup.connect(url).get(); Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first(); //pin board Element boardEle = bottomDoc.select("div[class=boardHeader]").first(); JSONArray board = new JSONArray(); JSONObject b = new JSONObject(); String boardName = ""; try { boardName = boardEle.select("h3[class=title]").text().trim(); } catch (Exception ee) { } String boardSrc = ""; try { boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim(); } catch (Exception ee) { } b.append("name", boardName); b.append("src", boardSrc); board.put(b); //related pins bottomDoc = doc .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]") .first(); JSONArray relatedPins = new JSONArray(); Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]"); for (Element relatedPinsCont : relatedPinsConts) { JSONObject relatedPin = new JSONObject(); relatedPin.append("src", "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href")); relatedPins.put(relatedPin); } // process new boards List<String> oldBoardNames = new ArrayList<String>(); for (int i = 0; i < oldBoards.length(); i++) { oldBoardNames.add(oldBoards.getJSONObject(i).getString("name")); } for (int i = 0; i < board.length(); i++) { JSONObject tmp = board.getJSONObject(i); if (oldBoardNames.contains(tmp.getString("name"))) { continue; } oldBoards.put(board.get(i)); } // process new related pins List<String> oldRelatedPins = new ArrayList<String>(); for (int i = 0; i < oldRltPin.length(); i++) { oldRelatedPins.add(oldRltPin.getJSONObject(i).getString("src")); } for (int i = 0; i < relatedPins.length(); i++) { if (oldRelatedPins.contains(relatedPins.getJSONObject(i).get("src"))) { continue; } oldRltPin.put(relatedPins.getJSONObject(i)); } BasicDBObject newAttr = new BasicDBObject(); newAttr.append("board", oldBoards); newAttr.append("related_pins", oldRltPin); BasicDBObject update = new BasicDBObject().append("$set", newAttr); pinsCollection.update(new BasicDBObject("ID", id), update); return new Text("Pin " + id + " updated."); }