List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:Main.java
public static void main(String[] args) { String html = "<!html>" + "<html><body>" + "<span id='my'>" + "<span class=''contentTitle'>Director:</span>" + "<span>test</span></span>" + "</body></html>"; Document doc = Jsoup.parse(html); System.out.println(doc.toString()); Elements spanWithId = doc.select("span#my"); if (spanWithId != null) { System.out.printf("Found %d Elements\n", spanWithId.size()); if (!spanWithId.isEmpty()) { Iterator<Element> it = spanWithId.iterator(); Element element = null; while (it.hasNext()) { element = it.next();/*from ww w .ja v a 2s . c o m*/ System.out.println(element.toString()); } } } }
From source file:Main.java
public static void main(String[] args) throws Exception { String url = "http://www.java2s.com"; Document doc = Jsoup.connect(url).userAgent( "Mozilla/5.0 (Windows; U; WindowsNT 5.1;" + " en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6") .get();/* w w w .ja v a2s. c om*/ Elements resultDivElems = doc.select("div"); Iterator<Element> itr = resultDivElems.iterator(); while (itr.hasNext()) { System.out.println(((Element) itr.next()).text()); } }
From source file:downloadwolkflow.getWorkFlowList.java
public static void main(String args[]) { CloseableHttpClient httpclient = HttpClients.createDefault(); String[] pageList = getPageList(); System.out.println(pageList.length); for (int i = 1; i < pageList.length; i++) { System.out.println(pageList[i]); System.out.println("---------------------------------------------------------------------------"); HttpGet httpget = new HttpGet(pageList[i]); try {/*from w w w. j a v a2 s .c o m*/ HttpResponse response = httpclient.execute(httpget); String page = EntityUtils.toString(response.getEntity()); Document mainDoc = Jsoup.parse(page); Elements resultList = mainDoc.select("div.resource_list_item"); for (int j = 0; j < resultList.size(); j++) { Element workflowResult = resultList.get(j); Element detailInfo = workflowResult.select("div.main_panel").first().select("p.title.inline") .first().select("a").first(); String detailUrl = "http://www.myexperiment.org" + detailInfo.attributes().get("href") + ".html"; System.out.println(detailUrl); downloadWorkFlow(detailUrl, httpclient); Thread.sleep(1000); } } catch (IOException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } catch (InterruptedException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } } try { httpclient.close(); } catch (IOException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:zz.pseas.ghost.login.taobao.MTaobaoLogin.java
public static void main(String[] args) throws UnsupportedEncodingException { String tbuserNmae = "TBname"; String tbpassWord = "TBpasssword"; GhostClient iPhone = new GhostClient("utf-8"); String ans = iPhone.get("https://login.m.taobao.com/login.htm"); Document doc = Jsoup.parse(ans); String url = doc.select("form#loginForm").first().attr("action"); String _tb_token = doc.select("input[name=_tb_token_]").first().attr("value"); String sid = doc.select("input[name=sid]").first().attr("value"); System.out.println(_tb_token); System.out.println(sid);//from ww w .j a va 2 s . co m System.out.println(url); HashMap<String, String> map = new HashMap<String, String>(); map.put("TPL_password", tbpassWord); map.put("TPL_username", tbuserNmae); map.put("_tb_token_", _tb_token); map.put("action", "LoginAction"); map.put("event_submit_do_login", "1"); map.put("loginFrom", "WAP_TAOBAO"); map.put("sid", sid); String location = null; while (true) { CommonsPage commonsPage = iPhone.postForPage(url, map); location = commonsPage.getHeader("Location"); String postAns = new String(commonsPage.getContents(), "utf-8"); if (StringUtil.isNotEmpty(location) && StringUtil.isEmpty(postAns)) { break; } String s = Jsoup.parse(postAns).select("img.checkcode-img").first().attr("src"); String imgUrl = "https:" + s; byte[] bytes = iPhone.getBytes(imgUrl); FileUtil.writeFile(bytes, "g:/tbCaptcha.jpg"); String wepCheckId = Jsoup.parse(postAns).select("input[name=wapCheckId]").val(); String captcha = null; map.put("TPL_checkcode", captcha); map.put("wapCheckId", wepCheckId); } iPhone.get(location); String tk = iPhone.getCookieValue("_m_h5_tk"); if (StringUtil.isNotEmpty(tk)) { tk = tk.split("_")[0]; } else { tk = "undefined"; } String url2 = genUrl(tk); String ans1 = iPhone.get(url2); System.out.println(url2); System.out.println(ans1); tk = iPhone.getCookieValue("_m_h5_tk").split("_")[0]; if (StringUtil.isEmpty(tk)) { tk = "undefined"; } System.out.println(tk); url2 = genUrl(tk); iPhone.showCookies(); RequestConfig requestConfig = RequestConfig.custom().setProxy(new HttpHost("127.0.0.1", 8888)).build(); HttpUriRequest get = RequestBuilder.get().setConfig(requestConfig) //.addHeader("User-Agent","Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16") .addHeader("Host", "api.m.taobao.com").setUri(url2).build(); ans1 = iPhone.execute(get); System.out.println(ans1); }
From source file:com.alexoree.jenkins.Main.java
public static void main(String[] args) throws Exception { // create Options object Options options = new Options(); options.addOption("t", false, "throttle the downloads, waits 5 seconds in between each d/l"); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("jenkins-sync", options); CommandLineParser parser = new DefaultParser(); CommandLine cmd = parser.parse(options, args); boolean throttle = cmd.hasOption("t"); String plugins = "https://updates.jenkins-ci.org/latest/"; List<String> ps = new ArrayList<String>(); Document doc = Jsoup.connect(plugins).get(); for (Element file : doc.select("td a")) { //System.out.println(file.attr("href")); if (file.attr("href").endsWith(".hpi") || file.attr("href").endsWith(".war")) { ps.add(file.attr("href")); }/*from www . j a v a 2s . c om*/ } File root = new File("."); //https://updates.jenkins-ci.org/latest/AdaptivePlugin.hpi new File("./latest").mkdirs(); //output zip file String zipFile = "jenkinsSync.zip"; // create byte buffer byte[] buffer = new byte[1024]; FileOutputStream fos = new FileOutputStream(zipFile); ZipOutputStream zos = new ZipOutputStream(fos); //download the plugins for (int i = 0; i < ps.size(); i++) { System.out.println("[" + i + "/" + ps.size() + "] downloading " + plugins + ps.get(i)); String outputFile = download(root.getAbsolutePath() + "/latest/" + ps.get(i), plugins + ps.get(i)); FileInputStream fis = new FileInputStream(outputFile); // begin writing a new ZIP entry, positions the stream to the start of the entry data zos.putNextEntry(new ZipEntry(outputFile.replace(root.getAbsolutePath(), "") .replace("updates.jenkins-ci.org/", "").replace("https:/", ""))); int length; while ((length = fis.read(buffer)) > 0) { zos.write(buffer, 0, length); } zos.closeEntry(); fis.close(); if (throttle) Thread.sleep(WAIT); new File(root.getAbsolutePath() + "/latest/" + ps.get(i)).deleteOnExit(); } //download the json metadata plugins = "https://updates.jenkins-ci.org/"; ps = new ArrayList<String>(); doc = Jsoup.connect(plugins).get(); for (Element file : doc.select("td a")) { //System.out.println(file.attr("href")); if (file.attr("href").endsWith(".json")) { ps.add(file.attr("href")); } } for (int i = 0; i < ps.size(); i++) { download(root.getAbsolutePath() + "/" + ps.get(i), plugins + ps.get(i)); FileInputStream fis = new FileInputStream(root.getAbsolutePath() + "/" + ps.get(i)); // begin writing a new ZIP entry, positions the stream to the start of the entry data zos.putNextEntry(new ZipEntry(plugins + ps.get(i))); int length; while ((length = fis.read(buffer)) > 0) { zos.write(buffer, 0, length); } zos.closeEntry(); fis.close(); new File(root.getAbsolutePath() + "/" + ps.get(i)).deleteOnExit(); if (throttle) Thread.sleep(WAIT); } // close the ZipOutputStream zos.close(); }
From source file:com.atlbike.etl.service.ClientFormLogin.java
/** * @param args/*from w w w.j a v a2 s .c o m*/ * @throws Exception */ public static void main(String[] args) throws Exception { String authenticityToken = ""; BasicCookieStore cookieStore = new BasicCookieStore(); CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(cookieStore) .setRedirectStrategy(new LaxRedirectStrategy()).build(); try { HttpGet httpget = new HttpGet("https://atlbike.nationbuilder.com/login"); CloseableHttpResponse response1 = httpclient.execute(httpget); try { HttpEntity entity = response1.getEntity(); System.out.println("Content Length: " + entity.getContentLength()); System.out.println("Login form get: " + response1.getStatusLine()); // EntityUtils.consume(entity); String content = EntityUtils.toString(entity); Document doc = Jsoup.parse(content); Elements metaElements = doc.select("META"); for (Element elem : metaElements) { System.out.println(elem); if (elem.hasAttr("name") && "csrf-token".equals(elem.attr("name"))) { System.out.println("Value: " + elem.attr("content")); authenticityToken = elem.attr("content"); } } System.out.println("Initial set of cookies:"); List<Cookie> cookies = cookieStore.getCookies(); if (cookies.isEmpty()) { System.out.println("None"); } else { for (int i = 0; i < cookies.size(); i++) { System.out.println("- " + cookies.get(i).toString()); } } } finally { response1.close(); } HttpUriRequest login = RequestBuilder.post() .setUri(new URI("https://atlbike.nationbuilder.com/forms/user_sessions")) .addParameter("email_address", "").addParameter("user_session[email]", "email@domain") .addParameter("user_session[password]", "magicCookie") .addParameter("user_session[remember_me]", "1").addParameter("commit", "Sign in with email") .addParameter("authenticity_token", authenticityToken).build(); CloseableHttpResponse response2 = httpclient.execute(login); try { HttpEntity entity = response2.getEntity(); // for (Header h : response2.getAllHeaders()) { // System.out.println(h); // } System.out.println("Content Length: " + entity.getContentLength()); System.out.println("Login form get: " + response2.getStatusLine()); EntityUtils.consume(entity); System.out.println("Post logon cookies:"); List<Cookie> cookies = cookieStore.getCookies(); if (cookies.isEmpty()) { System.out.println("None"); } else { for (int i = 0; i < cookies.size(); i++) { System.out.println("- " + cookies.get(i).toString()); } } } finally { response2.close(); } httpget = new HttpGet( // HttpUriRequest file = RequestBuilder // .post() // .setUri(new URI( "https://atlbike.nationbuilder.com/admin/membership_types/14/download"); // .build(); // CloseableHttpResponse response3 = httpclient.execute(file); CloseableHttpResponse response3 = httpclient.execute(httpget); try { HttpEntity entity = response3.getEntity(); System.out.println("Content Length: " + entity.getContentLength()); System.out.println("File Get: " + response3.getStatusLine()); saveEntity(entity); // EntityUtils.consume(entity); System.out.println("Post file get cookies:"); List<Cookie> cookies = cookieStore.getCookies(); if (cookies.isEmpty()) { System.out.println("None"); } else { for (int i = 0; i < cookies.size(); i++) { System.out.println("- " + cookies.get(i).toString()); } } } finally { response3.close(); } } finally { httpclient.close(); } }
From source file:module.entities.NameFinder.RegexNameFinder.java
/** * @param args the command line arguments *//*from w ww.ja v a 2 s . c o m*/ public static void main(String[] args) throws SQLException, IOException { if (args.length == 1) { Config.configFile = args[0]; } long lStartTime = System.currentTimeMillis(); Timestamp startTime = new Timestamp(lStartTime); System.out.println("Regex Name Finder process started at: " + startTime); DB.initPostgres(); regexerId = DB.LogRegexFinder(lStartTime); initLexicons(); JSONObject obj = new JSONObject(); TreeMap<Integer, String> consultations = DB.getDemocracitConsultationBody(); Document doc; int count = 0; TreeMap<Integer, String> consFoundNames = new TreeMap<>(); TreeMap<Integer, String> consFoundRoles = new TreeMap<>(); for (int consId : consultations.keySet()) { String consBody = consultations.get(consId); String signName = "", roleName = ""; doc = Jsoup.parse(consBody); Elements allPars = new Elements(); Elements paragraphs = doc.select("p"); for (Element par : paragraphs) { if (par.html().contains("<br>")) { String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>"; Document internal_doc = Jsoup.parse(out); Elements subparagraphs = internal_doc.select("p"); allPars.addAll(subparagraphs); } else { allPars.add(par); } // System.out.println(formatedText); } String signature = getSignatureFromParagraphs(allPars); // System.out.println(signature); if (signature.contains("#")) { String[] sign_tokens = signature.split("#"); signName = sign_tokens[0]; if (sign_tokens.length > 1) { roleName = sign_tokens[1]; } consFoundNames.put(consId, signName.trim()); consFoundRoles.put(consId, roleName.trim()); count++; } else { System.err.println("--" + consId); } // } DB.insertDemocracitConsultationMinister(consFoundNames, consFoundRoles); TreeMap<Integer, String> consultationsCompletedText = DB.getDemocracitCompletedConsultationBody(); Document doc2; TreeMap<Integer, String> complConsFoundNames = new TreeMap<>(); int count2 = 0; for (int consId : consultationsCompletedText.keySet()) { String consBody = consultationsCompletedText.get(consId); String signName = "", roleName = ""; doc2 = Jsoup.parse(consBody); // if (doc.text().contains("<br>")) { // doc.text().replaceAll("(<[Bb][Rr]>)+", "<p>"); // } Elements allPars = new Elements(); Elements paragraphs = doc2.select("p"); for (Element par : paragraphs) { if (par.html().contains("<br>")) { String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>"; Document internal_doc = Jsoup.parse(out); Elements subparagraphs = internal_doc.select("p"); allPars.addAll(subparagraphs); } else { allPars.add(par); } } String signature = getSignatureFromParagraphs(allPars); if (signature.contains("#")) { String[] sign_tokens = signature.split("#"); signName = sign_tokens[0]; if (sign_tokens.length > 1) { roleName = sign_tokens[1]; } consFoundNames.put(consId, signName.trim()); consFoundRoles.put(consId, roleName.trim()); // System.out.println(consId); // System.out.println(signName.trim()); // System.out.println("***************"); count2++; } else { System.err.println("++" + consId); } } DB.insertDemocracitConsultationMinister(complConsFoundNames, consFoundRoles); long lEndTime = System.currentTimeMillis(); System.out.println("Regex Name Finder process finished at: " + startTime); obj.put("message", "Regex Name Finder finished with no errors"); obj.put("details", ""); DB.UpdateLogRegexFinder(lEndTime, regexerId, obj); DB.close(); }
From source file:isc_415_practica_1.ISC_415_Practica_1.java
/** * @param args the command line arguments *///from w w w . j a v a2s . c o m public static void main(String[] args) { String urlString; Scanner input = new Scanner(System.in); Document doc; try { urlString = input.next(); if (urlString.equals("servlet")) { urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client"; } urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString : "http://" + urlString; doc = Jsoup.connect(urlString).get(); } catch (Exception ex) { System.out.println("El URL ingresado no es valido."); return; } ArrayList<NameValuePair> formInputParams; formInputParams = new ArrayList<>(); String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n"); System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length)); System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size())); System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size())); System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size())); Integer index = 1; ArrayList<NameValuePair> urlParameters = new ArrayList<>(); for (Element e : doc.select("form")) { System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size())); System.out.println(e.select("input")); for (Element formInput : e.select("input")) { if (formInput.attr("id") != null && formInput.attr("id") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1")); } else if (formInput.attr("name") != null && formInput.attr("name") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1")); } } index++; } if (!urlParameters.isEmpty()) { try { CloseableHttpClient httpclient = HttpClients.createDefault(); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8); HttpPost httpPost = new HttpPost(urlString); httpPost.setHeader("User-Agent", USER_AGENT); httpPost.setEntity(entity); HttpResponse response = httpclient.execute(httpPost); System.out.println(response.getStatusLine()); } catch (IOException ex) { Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:Main.java
public static String getPrivacyNotice(String url, String tag) throws IOException { Document doc = Jsoup.connect(url).get(); return doc.select(tag).first().text(); }
From source file:Main.java
public static void parseHtml(String html) { Document document = Jsoup.parse(html); Element linkElement = document.select("a").first(); String linkHref = linkElement.attr("href"); // "http://sample.com" String linkText = linkElement.text(); // "This is sample" System.out.println(linkHref); System.out.println(linkText); }