List of usage examples for org.jsoup.nodes Element val
public String val()
From source file:edu.uci.ics.crawler4j.examples.login.LoginCrawlController.java
public static void main(String[] args) throws Exception { // if (args.length != 2) { // System.out.println("Needed parameters: "); // System.out.println("\t rootFolder (it will contain intermediate crawl data)"); // System.out.println("\t numberOfCralwers (number of concurrent threads)"); // return; // }// w w w . j ava2 s. com /* * crawlStorageFolder is a folder where intermediate crawl data is * stored. */ String crawlStorageFolder = "/tmp/test_crawler/"; /* * numberOfCrawlers shows the number of concurrent threads that should * be initiated for crawling. */ int numberOfCrawlers = 1; CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(crawlStorageFolder); /* * Be polite: Make sure that we don't send more than 1 request per * second (1000 milliseconds between requests). */ config.setPolitenessDelay(1000); /* * You can set the maximum crawl depth here. The default value is -1 for * unlimited depth */ config.setMaxDepthOfCrawling(0); /* * You can set the maximum number of pages to crawl. The default value * is -1 for unlimited number of pages */ config.setMaxPagesToFetch(1000); /* * Do you need to set a proxy? If so, you can use: * config.setProxyHost("proxyserver.example.com"); * config.setProxyPort(8080); * * If your proxy also needs authentication: * config.setProxyUsername(username); config.getProxyPassword(password); */ /* * This config parameter can be used to set your crawl to be resumable * (meaning that you can resume the crawl from a previously * interrupted/crashed crawl). Note: if you enable resuming feature and * want to start a fresh crawl, you need to delete the contents of * rootFolder manually. */ config.setResumableCrawling(false); config.setIncludeHttpsPages(true); HttpClient client = new DefaultHttpClient(); HttpResponse response = client.execute(new HttpGet("http://58921.com/user/login")); HttpEntity entity = response.getEntity(); String content = EntityUtils.toString(entity, HTTP.UTF_8); Document doc = Jsoup.parse(content); Elements elements = doc.getElementById("user_login_form").children(); Element tokenEle = elements.last(); String token = tokenEle.val(); System.out.println(token); LoginConfiguration somesite; try { somesite = new LoginConfiguration("58921.com", new URL("http://58921.com/user/login"), new URL("http://58921.com/user/login/ajax?ajax=submit&__q=user/login")); somesite.addParam("form_id", "user_login_form"); somesite.addParam("mail", "paxbeijing@gmail.com"); somesite.addParam("pass", "cetas123"); somesite.addParam("submit", ""); somesite.addParam("form_token", token); config.addLoginConfiguration(somesite); } catch (MalformedURLException e) { e.printStackTrace(); } /* * Instantiate the controller for this crawl. */ PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); robotstxtConfig.setEnabled(false); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); /* * For each crawl, you need to add some seed urls. These are the first * URLs that are fetched and then the crawler starts following links * which are found in these pages */ controller.addSeed("http://58921.com/alltime?page=60"); /* * Start the crawl. This is a blocking operation, meaning that your code * will reach the line after this only when crawling is finished. */ controller.start(LoginCrawler.class, numberOfCrawlers); controller.env.close(); }
From source file:org.keycloak.testsuite.util.SamlClient.java
/** * Extracts and parses value of SAMLResponse input field of a form present in the given page. * * @param responsePage HTML code of the page * @return/*from ww w . ja v a2 s . com*/ */ public static SAMLDocumentHolder extractSamlResponseFromForm(String responsePage) { org.jsoup.nodes.Document theResponsePage = Jsoup.parse(responsePage); Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]"); Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]"); int size = samlResponses.size() + samlRequests.size(); assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1)); Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first(); return SAMLRequestParser.parseResponsePostBinding(respElement.val()); }
From source file:org.wso2.carbon.appmgt.sampledeployer.main.ApplicationPublisher.java
private static void accsesWebPages(String webContext, String trackingCode, int hitCount) { String loginHtmlPage = null;//from www . j a va 2s .c o m String webAppurl = "http://" + ipAddress + ":8280" + webContext + "/1.0.0/"; String responceHtml = null; try { loginHtmlPage = httpHandler.getHtml(webAppurl); Document html = Jsoup.parse(loginHtmlPage); Element something = html.select("input[name=sessionDataKey]").first(); String sessionDataKey = something.val(); responceHtml = httpHandler.doPostHttps(backEndUrl + "/commonauth", "username=admin&password=admin&sessionDataKey=" + sessionDataKey, "none", "application/x-www-form-urlencoded; charset=UTF-8"); Document postHtml = Jsoup.parse(responceHtml); Element postHTMLResponse = postHtml.select("input[name=SAMLResponse]").first(); String samlResponse = postHTMLResponse.val(); String appmSamlSsoTokenId = httpHandler.doPostHttp(webAppurl, "SAMLResponse=" + URLEncoder.encode(samlResponse, "UTF-8"), "appmSamlSsoTokenId", "application/x-www-form-urlencoded; charset=UTF-8"); for (int i = 0; i < hitCount; i++) { if (webContext.equals("/notifi")) { if (i == hitCount / 5) { webAppurl += "member/"; } else if (i == hitCount / 2) { webAppurl = appendPageToUrl("admin", webAppurl, false); } } else if (webContext.equals("/travelBooking")) { if (i == hitCount / 5) { webAppurl = appendPageToUrl("booking-step1.jsp", webAppurl, true); } else if (i == hitCount / 2) { webAppurl = appendPageToUrl("booking-step2.jsp", webAppurl, false); } } httpHandler.doGet("http://" + ipAddress + ":8280/statistics/", trackingCode, appmSamlSsoTokenId, webAppurl); log.info("Web Page : " + webAppurl + " Hit count : " + i); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } }
From source file:org.keycloak.testsuite.util.saml.LoginBuilder.java
public static HttpUriRequest handleLoginPage(UserRepresentation user, String loginPage) { String username = user.getUsername(); String password = getPasswordOf(user); org.jsoup.nodes.Document theLoginPage = Jsoup.parse(loginPage); List<NameValuePair> parameters = new LinkedList<>(); for (Element form : theLoginPage.getElementsByTag("form")) { String method = form.attr("method"); String action = form.attr("action"); boolean isPost = method != null && "post".equalsIgnoreCase(method); for (Element input : form.getElementsByTag("input")) { if (Objects.equals(input.id(), "username")) { parameters.add(new BasicNameValuePair(input.attr("name"), username)); } else if (Objects.equals(input.id(), "password")) { parameters.add(new BasicNameValuePair(input.attr("name"), password)); } else { parameters.add(new BasicNameValuePair(input.attr("name"), input.val())); }/*from www . jav a 2 s. c om*/ } if (isPost) { HttpPost res = new HttpPost(action); UrlEncodedFormEntity formEntity; try { formEntity = new UrlEncodedFormEntity(parameters, "UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } res.setEntity(formEntity); return res; } else { UriBuilder b = UriBuilder.fromPath(action); for (NameValuePair parameter : parameters) { b.queryParam(parameter.getName(), parameter.getValue()); } return new HttpGet(b.build()); } } throw new IllegalArgumentException("Invalid login form: " + loginPage); }
From source file:de.geeksfactory.opacclient.apis.IOpac.java
static void parseMediaList(List<LentItem> media, Document doc, JSONObject data) { if (doc.select("a[name=AUS]").size() == 0) return;/*from w w w . j a v a 2 s . c o m*/ Elements copytrs = doc.select("a[name=AUS] ~ table, a[name=AUS] ~ form table").first().select("tr"); doc.setBaseUri(data.optString("baseurl")); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN); int trs = copytrs.size(); if (trs < 2) { return; } assert (trs > 0); JSONObject copymap = new JSONObject(); try { if (data.has("accounttable")) { copymap = data.getJSONObject("accounttable"); } } catch (JSONException e) { } Pattern datePattern = Pattern.compile("\\d{2}\\.\\d{2}\\.\\d{4}"); for (int i = 1; i < trs; i++) { Element tr = copytrs.get(i); LentItem item = new LentItem(); if (copymap.optInt("title", 0) >= 0) { item.setTitle(tr.child(copymap.optInt("title", 0)).text().trim().replace("\u00a0", "")); } if (copymap.optInt("author", 1) >= 0) { item.setAuthor(tr.child(copymap.optInt("author", 1)).text().trim().replace("\u00a0", "")); } if (copymap.optInt("format", 2) >= 0) { item.setFormat(tr.child(copymap.optInt("format", 2)).text().trim().replace("\u00a0", "")); } int prolongCount = 0; if (copymap.optInt("prolongcount", 3) >= 0) { prolongCount = Integer .parseInt(tr.child(copymap.optInt("prolongcount", 3)).text().trim().replace("\u00a0", "")); item.setStatus(String.valueOf(prolongCount) + "x verl."); } if (data.optInt("maxprolongcount", -1) != -1) { item.setRenewable(prolongCount < data.optInt("maxprolongcount", -1)); } if (copymap.optInt("returndate", 4) >= 0) { String value = tr.child(copymap.optInt("returndate", 4)).text().trim().replace("\u00a0", ""); Matcher matcher = datePattern.matcher(value); if (matcher.find()) { try { item.setDeadline(fmt.parseLocalDate(matcher.group())); } catch (IllegalArgumentException e1) { e1.printStackTrace(); } } } if (copymap.optInt("prolongurl", 5) >= 0) { if (tr.children().size() > copymap.optInt("prolongurl", 5)) { Element cell = tr.child(copymap.optInt("prolongurl", 5)); if (cell.select("input[name=MedNrVerlAll]").size() > 0) { // new iOPAC Version 1.45 - checkboxes to prolong multiple items // internal convention: We add "NEW" to the media ID to show that we have // the new iOPAC version Element input = cell.select("input[name=MedNrVerlAll]").first(); String value = input.val(); item.setProlongData("NEW" + value); item.setId(value.split(";")[0]); if (input.hasAttr("disabled")) item.setRenewable(false); } else { // previous versions - link for prolonging on every medium String link = cell.select("a").attr("href"); item.setProlongData(link); // find media number with regex Pattern pattern = Pattern.compile("mednr=([^&]*)&"); Matcher matcher = pattern.matcher(link); if (matcher.find() && matcher.group() != null) item.setId(matcher.group(1)); } } } media.add(item); } assert (media.size() == trs - 1); }
From source file:com.daiv.android.twitter.services.TrimDataService.java
public String getVersion() { try {// www. j a v a 2 s . co m Document doc = getDoc(); if (doc != null) { Elements elements = doc.getElementsByAttributeValue("itemprop", "softwareVersion"); Log.v("Test_version", "elements size: " + elements.size()); for (Element e : elements) { Log.v("Test_version", e.val()); return e.val(); } } } catch (Exception e) { e.printStackTrace(); } catch (OutOfMemoryError e) { e.printStackTrace(); } return null; }
From source file:com.klinker.android.twitter.services.TrimDataService.java
public String getVersion() { try {//from w w w. j a v a2s .co m Document doc = getDoc(); if (doc != null) { Elements elements = doc.getElementsByAttributeValue("itemprop", "softwareVersion"); Log.v("talon_version", "elements size: " + elements.size()); for (Element e : elements) { Log.v("talon_version", e.val()); return e.val(); } } } catch (Exception e) { e.printStackTrace(); } catch (OutOfMemoryError e) { e.printStackTrace(); } return null; }
From source file:com.liato.bankdroid.banking.banks.AbsIkanoPartner.java
@Override protected LoginPackage preLogin() throws BankException, ClientProtocolException, IOException { urlopen = new Urllib(context, CertificateReader.getCertificates(context, R.raw.cert_ikanopartner)); response = urlopen.open("https://partner.ikanobank.se/web/engines/page.aspx?structid=" + structId); Document d = Jsoup.parse(response); Element viewstate = d.getElementById("__VIEWSTATE"); if (viewstate == null || TextUtils.isEmpty(viewstate.val())) { throw new BankException(res.getText(R.string.unable_to_find).toString() + " ViewState."); }/* w ww. j a va2 s . c o m*/ Element eventvalidation = d.getElementById("__EVENTVALIDATION"); if (eventvalidation == null || TextUtils.isEmpty(eventvalidation.val())) { throw new BankException(res.getText(R.string.unable_to_find).toString() + " EventValidation."); } Element userField = d.select("#LoginSpan input[type=text]").first(); Element passField = d.select("#LoginSpan input[type=password]").first(); Element submitField = d.select("#LoginCustomerDiv input[type=submit]").first(); if (userField == null || passField == null || submitField == null) { throw new BankException(res.getText(R.string.unable_to_find).toString() + " login fields."); } List<NameValuePair> postData = new ArrayList<NameValuePair>(); postData.add(new BasicNameValuePair("__VIEWSTATE", viewstate.val())); postData.add(new BasicNameValuePair("__EVENTVALIDATION", eventvalidation.val())); postData.add(new BasicNameValuePair(userField.attr("name"), username)); postData.add(new BasicNameValuePair(passField.attr("name"), password)); postData.add(new BasicNameValuePair(submitField.attr("name"), submitField.val())); return new LoginPackage(urlopen, postData, response, "https://partner.ikanobank.se/web/engines/page.aspx?structid=" + structId); }
From source file:org.apache.sling.hapi.client.forms.internal.FormValues.java
/** * @return//from w w w .j a v a2 s. c o m * {@see http://www.w3.org/TR/html5/forms.html#constructing-the-form-data-set} */ private FormValues build() { for (Element input : form.select("button, input, select, textarea")) { String type = input.attr("type"); if (input.hasAttr("disabled")) continue; if (input.tagName().equalsIgnoreCase("button") && !type.equals("submit")) continue; if (input.tagName().equalsIgnoreCase("input") && (type.equals("button") || type.equals("reset"))) continue; if (type.equals("checkbox") && input.hasAttr("checked")) continue; if (type.equals("radio") && input.hasAttr("checked")) continue; if (!type.equals("image") && input.attr("name").length() == 0) continue; if (input.parents().is("datalist")) continue; if (type.equals("image") || type.equals("file")) continue; // don't support files for now String name = input.attr("name"); if (input.tagName().equalsIgnoreCase("select")) { for (Element o : input.select("option[selected]")) { if (o.hasAttr("disabled")) continue; list.add(name, new BasicNameValuePair(name, o.val())); } } else if (type.equals("checkbox") || type.equals("radio")) { String value = input.hasAttr("value") ? input.val() : "on"; list.add(name, new BasicNameValuePair(name, value)); } else { list.add(name, new BasicNameValuePair(name, input.val())); } } return this; }
From source file:org.keycloak.testsuite.util.saml.ModifySamlResponseStepBuilder.java
private HttpUriRequest handlePostBinding(CloseableHttpResponse currentResponse) throws Exception { assertThat(currentResponse, statusCodeIsHC(Status.OK)); final String htmlBody = EntityUtils.toString(currentResponse.getEntity()); assertThat(htmlBody, Matchers.containsString("SAML")); org.jsoup.nodes.Document theResponsePage = Jsoup.parse(htmlBody); Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]"); Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]"); Elements forms = theResponsePage.select("form"); Elements relayStates = theResponsePage.select("input[name=RelayState]"); int size = samlResponses.size() + samlRequests.size(); assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1)); assertThat("Checking uniqueness of forms in the page", forms, hasSize(1)); Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first(); Element form = forms.first(); String base64EncodedSamlDoc = respElement.val(); InputStream decoded = PostBindingUtil.base64DecodeAsStream(base64EncodedSamlDoc); String samlDoc = IOUtils.toString(decoded, GeneralConstants.SAML_CHARSET); IOUtils.closeQuietly(decoded);/*from ww w . ja v a 2 s.c om*/ String transformed = getTransformer().transform(samlDoc); if (transformed == null) { return null; } final String attributeName = this.targetAttribute != null ? this.targetAttribute : respElement.attr("name"); List<NameValuePair> parameters = new LinkedList<>(); if (!relayStates.isEmpty()) { parameters.add(new BasicNameValuePair(GeneralConstants.RELAY_STATE, relayStates.first().val())); } URI locationUri = this.targetUri != null ? this.targetUri : URI.create(form.attr("action")); return createRequest(locationUri, attributeName, transformed, parameters); }