List of usage examples for org.jsoup.nodes Document getElementById
public Element getElementById(String id)
From source file:com.rest.samples.getTipoCambioBanxico.java
public static void main(String[] args) { String url = "http://www.banxico.org.mx/tipcamb/llenarTiposCambioAction.do?idioma=sp"; try {//w ww . jav a 2 s .c om HttpClient hc = HttpClientBuilder.create().build(); HttpGet request = new HttpGet(url); request.setHeader("User-Agent", "Mozilla/5.0"); request.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); HttpResponse res = hc.execute(request); if (res.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Failed : HTTP eror code: " + res.getStatusLine().getStatusCode()); } BufferedReader rd = new BufferedReader(new InputStreamReader(res.getEntity().getContent())); StringBuffer result = new StringBuffer(); String line = ""; while ((line = rd.readLine()) != null) { result.append(line); } Document doc = Jsoup.parse(result.toString()); Element tipoCambioFix = doc.getElementById("FIX_DATO"); System.out.println(tipoCambioFix.text()); } catch (IOException ex) { Logger.getLogger(SamplesUseHttpclient.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:edu.uci.ics.crawler4j.examples.login.LoginCrawlController.java
public static void main(String[] args) throws Exception { // if (args.length != 2) { // System.out.println("Needed parameters: "); // System.out.println("\t rootFolder (it will contain intermediate crawl data)"); // System.out.println("\t numberOfCralwers (number of concurrent threads)"); // return; // }//w ww . j a va 2 s.c o m /* * crawlStorageFolder is a folder where intermediate crawl data is * stored. */ String crawlStorageFolder = "/tmp/test_crawler/"; /* * numberOfCrawlers shows the number of concurrent threads that should * be initiated for crawling. */ int numberOfCrawlers = 1; CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(crawlStorageFolder); /* * Be polite: Make sure that we don't send more than 1 request per * second (1000 milliseconds between requests). */ config.setPolitenessDelay(1000); /* * You can set the maximum crawl depth here. The default value is -1 for * unlimited depth */ config.setMaxDepthOfCrawling(0); /* * You can set the maximum number of pages to crawl. The default value * is -1 for unlimited number of pages */ config.setMaxPagesToFetch(1000); /* * Do you need to set a proxy? If so, you can use: * config.setProxyHost("proxyserver.example.com"); * config.setProxyPort(8080); * * If your proxy also needs authentication: * config.setProxyUsername(username); config.getProxyPassword(password); */ /* * This config parameter can be used to set your crawl to be resumable * (meaning that you can resume the crawl from a previously * interrupted/crashed crawl). Note: if you enable resuming feature and * want to start a fresh crawl, you need to delete the contents of * rootFolder manually. */ config.setResumableCrawling(false); config.setIncludeHttpsPages(true); HttpClient client = new DefaultHttpClient(); HttpResponse response = client.execute(new HttpGet("http://58921.com/user/login")); HttpEntity entity = response.getEntity(); String content = EntityUtils.toString(entity, HTTP.UTF_8); Document doc = Jsoup.parse(content); Elements elements = doc.getElementById("user_login_form").children(); Element tokenEle = elements.last(); String token = tokenEle.val(); System.out.println(token); LoginConfiguration somesite; try { somesite = new LoginConfiguration("58921.com", new URL("http://58921.com/user/login"), new URL("http://58921.com/user/login/ajax?ajax=submit&__q=user/login")); somesite.addParam("form_id", "user_login_form"); somesite.addParam("mail", "paxbeijing@gmail.com"); somesite.addParam("pass", "cetas123"); somesite.addParam("submit", ""); somesite.addParam("form_token", token); config.addLoginConfiguration(somesite); } catch (MalformedURLException e) { e.printStackTrace(); } /* * Instantiate the controller for this crawl. */ PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); robotstxtConfig.setEnabled(false); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); /* * For each crawl, you need to add some seed urls. These are the first * URLs that are fetched and then the crawler starts following links * which are found in these pages */ controller.addSeed("http://58921.com/alltime?page=60"); /* * Start the crawl. This is a blocking operation, meaning that your code * will reach the line after this only when crawling is finished. */ controller.start(LoginCrawler.class, numberOfCrawlers); controller.env.close(); }
From source file:Main.java
public static Element getElement(Document doc, String id) { return doc.getElementById(id); }
From source file:io.github.carlomicieli.footballdb.starter.pages.TableBuilderTests.java
private static Element notTableElement() { String html = "<div id=\"id\">ooo</div>"; Document doc = JsoupHelper.fromHtml(html); return doc.getElementById("id"); }
From source file:io.github.carlomicieli.footballdb.starter.pages.TableBuilderTests.java
private static Element tableElement() { String html = "<table id=\"my-table\">" + "<tr><td>1</td><td>one</td><td>3</td></tr>" + "<tr><td colspan=\"3\"></td></tr>" + "<tr><td>2</td><td>two</td><td>1</td></tr>" + "</table>"; Document doc = JsoupHelper.fromHtml(html); return doc.getElementById("my-table"); }
From source file:com.feilong.tools.jsoup.JsoupUtil.java
/** * getElementById.//from ww w. j av a 2 s. c o m * * @param url * the url * @param id * the id * @return getElementById * @throws JsoupUtilException * the jsoup util exception * @see #getDocument(String) * @see org.jsoup.nodes.Element#getElementById(String) */ public static Element getElementById(String url, String id) throws JsoupUtilException { Validate.notEmpty(url); Validate.notEmpty(id); Document document = getDocument(url); Element element = document.getElementById(id); return element; }
From source file:com.shareplaylearn.OauthPasswordFlow.java
public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri) throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException { CloseableHttpClient httpClient = HttpClients.custom().build(); String oAuthQuery = "client_id=" + clientId + "&"; oAuthQuery += "response_type=code&"; oAuthQuery += "scope=openid email&"; oAuthQuery += "redirect_uri=" + callbackUri; URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null); Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString()); Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute(); if (oauthResponse.statusCode() != 200) { String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); }//from www.j ava2s . c o m throw new AuthorizationException(errorMessage); } Map<String, String> oauthCookies = oauthResponse.cookies(); Document oauthPage = oauthResponse.parse(); Element oauthForm = oauthPage.getElementById("gaia_loginform"); System.out.println(oauthForm.toString()); Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth"); HashMap<String, String> formParams = new HashMap<>(); for (Element child : oauthForm.children()) { System.out.println("Tag name: " + child.tagName()); System.out.println("attrs: " + Arrays.toString(child.attributes().asList().toArray())); if (child.tagName().equals("input") && child.hasAttr("name")) { String keyName = child.attr("name"); String keyValue = null; if (child.hasAttr("value")) { keyValue = child.attr("value"); } if (keyName != null && keyName.trim().length() != 0 && keyValue != null && keyValue.trim().length() != 0) { oauthPostConnection.data(keyName, keyValue); formParams.put(keyName, keyValue); } } } oauthPostConnection.cookies(oauthCookies); formParams.put("Email", username); formParams.put("Passwd-hidden", password); //oauthPostConnection.followRedirects(false); System.out.println("form post params were: "); for (Map.Entry<String, String> kvp : formParams.entrySet()) { //DO NOT let passwords end up in the logs ;) if (kvp.getKey().equals("Passwd")) { continue; } System.out.println(kvp.getKey() + "," + kvp.getValue()); } System.out.println("form cookies were: "); for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) { System.out.println(cookie.getKey() + "," + cookie.getValue()); } //System.exit(0); Connection.Response postResponse = null; try { postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute(); } catch (Throwable t) { System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage()); System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or " + " google is being a punk - login manually and check."); assertTrue(false); } if (postResponse.statusCode() != 200) { String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); } throw new UnauthorizedException(errorMessage); } System.out.println("Response headers (after post to google form & following redirect):"); for (Map.Entry<String, String> header : postResponse.headers().entrySet()) { System.out.println(header.getKey() + "," + header.getValue()); } System.out.println("Final response url was: " + postResponse.url().toString()); String[] args = postResponse.url().toString().split("&"); LoginInfo loginInfo = new LoginInfo(); for (String arg : args) { if (arg.startsWith("access_token")) { loginInfo.accessToken = arg.split("=")[1].trim(); } else if (arg.startsWith("id_token")) { loginInfo.idToken = arg.split("=")[1].trim(); } else if (arg.startsWith("expires_in")) { loginInfo.expiry = arg.split("=")[1].trim(); } } //Google doesn't actually throw a 401 or anything - it just doesn't redirect //and sends you back to it's login page to try again. //So this is what happens with an invalid password. if (loginInfo.accessToken == null || loginInfo.idToken == null) { //Document oauthPostResponse = postResponse.parse(); //System.out.println("*** Oauth response from google *** "); //System.out.println(oauthPostResponse.toString()); throw new UnauthorizedException( "Error retrieving authorization: did you use the correct username/password?"); } String[] idTokenFields = loginInfo.idToken.split("\\."); if (idTokenFields.length < 3) { throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had " + idTokenFields.length + " field!"); } String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8); loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class); loginInfo.id = loginInfo.idTokenBody.sub; return loginInfo; }
From source file:com.shareplaylearn.utilities.OauthPasswordFlow.java
public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri) throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException { CloseableHttpClient httpClient = HttpClients.custom().build(); String oAuthQuery = "client_id=" + clientId + "&"; oAuthQuery += "response_type=code&"; oAuthQuery += "scope=openid email&"; oAuthQuery += "redirect_uri=" + callbackUri; URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null); Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString()); Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute(); if (oauthResponse.statusCode() != 200) { String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); }/*from w ww. ja v a2 s.co m*/ throw new AuthorizationException(errorMessage); } Map<String, String> oauthCookies = oauthResponse.cookies(); Document oauthPage = oauthResponse.parse(); Element oauthForm = oauthPage.getElementById("gaia_loginform"); System.out.println(oauthForm.toString()); Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth"); HashMap<String, String> formParams = new HashMap<>(); for (Element child : oauthForm.children()) { if (child.tagName().equals("input") && child.hasAttr("name")) { String keyName = child.attr("name"); String keyValue = null; if (keyName.equals("Email")) { keyValue = username; } else if (keyName.equals("Passwd")) { keyValue = password; } else if (child.hasAttr("value")) { keyValue = child.attr("value"); } if (keyValue != null) { oauthPostConnection.data(keyName, keyValue); formParams.put(keyName, keyValue); } } } oauthPostConnection.cookies(oauthCookies); //oauthPostConnection.followRedirects(false); System.out.println("form post params were: "); for (Map.Entry<String, String> kvp : formParams.entrySet()) { //DO NOT let passwords end up in the logs ;) if (kvp.getKey().equals("Passwd")) { continue; } System.out.println(kvp.getKey() + "," + kvp.getValue()); } System.out.println("form cookies were: "); for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) { System.out.println(cookie.getKey() + "," + cookie.getValue()); } Connection.Response postResponse = null; try { postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute(); } catch (Throwable t) { System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage()); System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or " + " google is being a punk - login manually and check."); assertTrue(false); } if (postResponse.statusCode() != 200) { String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); } throw new UnauthorizedException(errorMessage); } System.out.println("Response headers (after post to google form & following redirect):"); for (Map.Entry<String, String> header : postResponse.headers().entrySet()) { System.out.println(header.getKey() + "," + header.getValue()); } System.out.println("Final response url was: " + postResponse.url().toString()); String[] args = postResponse.url().toString().split("&"); LoginInfo loginInfo = new LoginInfo(); for (String arg : args) { if (arg.startsWith("access_token")) { loginInfo.accessToken = arg.split("=")[1].trim(); } else if (arg.startsWith("id_token")) { loginInfo.idToken = arg.split("=")[1].trim(); } else if (arg.startsWith("expires_in")) { loginInfo.expiry = arg.split("=")[1].trim(); } } //Google doesn't actually throw a 401 or anything - it just doesn't redirect //and sends you back to it's login page to try again. //So this is what happens with an invalid password. if (loginInfo.accessToken == null || loginInfo.idToken == null) { //Document oauthPostResponse = postResponse.parse(); //System.out.println("*** Oauth response from google *** "); //System.out.println(oauthPostResponse.toString()); throw new UnauthorizedException( "Error retrieving authorization: did you use the correct username/password?"); } String[] idTokenFields = loginInfo.idToken.split("\\."); if (idTokenFields.length < 3) { throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had " + idTokenFields.length + " field!"); } String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8); loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class); loginInfo.id = loginInfo.idTokenBody.sub; return loginInfo; }
From source file:com.adavr.player.media.XVClient.java
private Map<String, String> getFlashVars(String url) throws ClientException { client.get(url);//from w w w .j a v a 2s. c om Document document = client.getJsoupDocument(); Element player = document.getElementById("player"); Elements candidates = player.getElementsByAttribute("flashvars"); if (candidates.isEmpty()) { return null; } return parseFlashVars(candidates.get(0).attr("flashvars")); }
From source file:io.github.carlomicieli.footballdb.starter.parsers.SeasonGamesParser.java
private Optional<Element> gamesTable(Document doc) { return Optional.ofNullable(doc.getElementById("games")); }