List of usage examples for org.jsoup.nodes Element getElementsByAttributeValue
public Elements getElementsByAttributeValue(String key, String value)
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * must take html from [Domain].portalAddress or [Domain].portalAddress/myclasses *///from ww w . jav a 2s . c om public static String[] passthroughCredentials(String html) { Element doc = Jsoup.parse(html); String uT = doc.getElementsByAttributeValue("name", "uT").attr("value"); String uID = doc.getElementsByAttributeValue("name", "uID").attr("value"); return new String[] { uT, uID }; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static String[] getGradebookCredentials(String html) { Element doc = Jsoup.parse(html); Elements userIdElements = doc.getElementsByAttributeValue("name", "userId"); Elements passwords = doc.getElementsByAttributeValue("name", "password"); String userId = userIdElements.attr("value"); String password = passwords.attr("value"); return new String[] { userId, password }; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static JSONArray detailedReport(String html) throws JSONException { Element doc = Jsoup.parse(html); // System.out.println(html); Element assignments = doc.getElementsByAttributeValue("id", "Assignments").get(0); Elements tableRows = assignments.getElementsByTag("tbody").get(0).getElementsByTag("tr"); JSONArray grades = new JSONArray(); for (Element tr : tableRows) { JSONObject assignment = new JSONObject(); Elements columns = tr.getElementsByTag("td"); for (int i = 0; i < columns.size(); i++) { String value = columns.get(i).text(); // do not store empty values! if (value.equals("")) continue; // first try to cast as int. try { assignment.putOpt(assignmentTableHeader(i), Integer.parseInt(value)); // if not int, try double } catch (NumberFormatException e) { try { assignment.putOpt(assignmentTableHeader(i), Double.parseDouble(value)); // if not double, use string } catch (NumberFormatException f) { assignment.putOpt(assignmentTableHeader(i), value); }//from w w w.j ava 2s. com } } String assignmentDetailLink = tr.getElementsByTag("a").get(0).attr("href"); Matcher matcher = Pattern.compile(".+" + "assignmentId=(\\d+)" + "&H=S" + "&GradebookId=(\\d+)" + "&TermId=\\d+" + "&StudentId=\\d+&").matcher(assignmentDetailLink); matcher.find(); int assignmentId = Integer.parseInt(matcher.group(1)); int gradebookId = Integer.parseInt(matcher.group(2)); assignment.put("assignmentId", assignmentId); assignment.put("gradebookId", gradebookId); grades.put(assignment); } // System.out.println((grades)); return grades; }
From source file:com.screenslicer.core.util.BrowserUtil.java
private static WebElement toElement(Browser browser, HtmlNode htmlNode, Element body, boolean recurse) throws ActionFailed { if (body == null) { body = BrowserUtil.openElement(browser, true, null, null, null, null); }//from w w w .ja v a 2 s. com if (!CommonUtil.isEmpty(htmlNode.id)) { Elements elements = body.getElementsByAttributeValue("id", htmlNode.id); if (elements.size() == 1) { WebElement element = toElement(browser, elements.get(0), htmlNode, recurse); if (element != null) { return element; } } } List<Elements> selected = new ArrayList<Elements>(); if (!CommonUtil.isEmpty(htmlNode.tagName)) { selected.add(body.getElementsByTag(htmlNode.tagName)); } else if (!CommonUtil.isEmpty(htmlNode.href)) { selected.add(body.getElementsByTag("a")); } if (!CommonUtil.isEmpty(htmlNode.id)) { selected.add(body.getElementsByAttributeValue("id", htmlNode.id)); } if (!CommonUtil.isEmpty(htmlNode.name)) { selected.add(body.getElementsByAttributeValue("name", htmlNode.name)); } if (!CommonUtil.isEmpty(htmlNode.type)) { selected.add(body.getElementsByAttributeValue("type", htmlNode.type)); } if (!CommonUtil.isEmpty(htmlNode.value)) { selected.add(body.getElementsByAttributeValue("value", htmlNode.value)); } if (!CommonUtil.isEmpty(htmlNode.title)) { selected.add(body.getElementsByAttributeValue("title", htmlNode.title)); } if (!CommonUtil.isEmpty(htmlNode.role)) { selected.add(body.getElementsByAttributeValue("role", htmlNode.role)); } if (!CommonUtil.isEmpty(htmlNode.alt)) { selected.add(body.getElementsByAttributeValue("alt", htmlNode.alt)); } if (htmlNode.classes != null && htmlNode.classes.length > 0) { Map<Element, Integer> found = new HashMap<Element, Integer>(); for (int i = 0; i < htmlNode.classes.length; i++) { Elements elements = body.getElementsByClass(htmlNode.classes[i]); for (Element element : elements) { if (!found.containsKey(element)) { found.put(element, 0); } found.put(element, found.get(element) + 1); } } Elements elements = new Elements(); for (int i = htmlNode.classes.length; i > 0; i--) { for (Map.Entry<Element, Integer> entry : found.entrySet()) { if (entry.getValue() == i) { elements.add(entry.getKey()); } } if (!elements.isEmpty()) { break; } } selected.add(elements); } if (!CommonUtil.isEmpty(htmlNode.href)) { Elements hrefs = body.getElementsByAttribute("href"); Elements toAdd = new Elements(); String currentUrl = browser.getCurrentUrl(); String hrefGiven = htmlNode.href; for (Element href : hrefs) { String hrefFound = href.attr("href"); if (hrefGiven.equalsIgnoreCase(hrefFound)) { toAdd.add(href); toAdd.add(href); toAdd.add(href); } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.endsWith(hrefGiven)) { toAdd.add(href); toAdd.add(href); } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.contains(hrefGiven)) { toAdd.add(href); } else { String uriGiven = UrlUtil.toCanonicalUri(currentUrl, hrefGiven); String uriFound = UrlUtil.toCanonicalUri(currentUrl, hrefFound); if (uriGiven.equalsIgnoreCase(uriFound)) { toAdd.add(href); } } } selected.add(toAdd); } if (!CommonUtil.isEmpty(htmlNode.innerText)) { selected.add(body.getElementsMatchingText(Pattern.quote(htmlNode.innerText))); selected.add(body.getElementsMatchingText("^\\s*" + Pattern.quote(htmlNode.innerText) + "\\s*$")); } if (htmlNode.multiple != null) { selected.add(body.getElementsByAttribute("multiple")); } Map<Element, Integer> votes = new HashMap<Element, Integer>(); for (Elements elements : selected) { for (Element element : elements) { if (!votes.containsKey(element)) { votes.put(element, 0); } votes.put(element, votes.get(element) + 2); if (!NodeUtil.isHidden(element)) { votes.put(element, votes.get(element) + 1); } } } int maxVote = 0; Element maxElement = null; for (Map.Entry<Element, Integer> entry : votes.entrySet()) { if (entry.getValue() > maxVote) { maxVote = entry.getValue(); maxElement = entry.getKey(); } } return toElement(browser, maxElement, htmlNode, recurse); }
From source file:com.cognifide.aet.job.common.comparators.w3chtml5.WarningNodeToW3cHtml5IssueFunction.java
@Override public W3cHtml5Issue apply(Node child) { if (!(child instanceof Element)) { return null; }/* w w w. j a v a 2 s. c om*/ Element element = (Element) child; W3cHtml5IssueType issueType = W3cHtml5IssueType .valueOf(StringUtils.removeStart(element.attr("class"), "msg_").toUpperCase()); String message = element.getElementsByAttributeValue("class", "msg").html(); String additionalInfo = element.child(1).html(); return new W3cHtml5Issue(0, 0, message, StringUtils.EMPTY, StringUtils.EMPTY, StringUtils.EMPTY, additionalInfo, issueType); }
From source file:ru.neverdark.yotta.parser.YottaParser.java
private void parseTable(Element table, String arrayType) { String array = null;//from ww w . j a va 2 s. c o m if (arrayType.equals("YB-16S3EF8")) { array = table.getElementsByAttributeValue("colspan", "6").get(0).text(); } else if (arrayType.equals("Y3-24S6DF8")) { array = table.getElementsByAttributeValue("colspan", "9").get(0).text(); } else if (arrayType.equals("Y3-16S6SF8p")) { array = table.getElementsByAttributeValue("colspan", "10").get(0).text(); } List<Disk> disks = new ArrayList<Disk>(); Elements trs = table.getElementsByAttributeValue("bgcolor", "FFFFDB"); for (Element tr : trs) { Elements tds = tr.getElementsByTag("td"); String slot = tds.get(0).text(); String usage = tds.get(1).text(); String capacity = tds.get(2).text(); String model = tds.get(3).text(); Disk disk = new Disk(); disk.setSlot(slot); disk.setUsage(usage); disk.setCapacity(capacity); disk.setModel(model); disks.add(disk); } mEnclosuresDisk.put(array, disks); }
From source file:jp.mau.twappremover.MainActivity.java
private void getApps() { _apps.clear();//from w w w .java2 s.c om HttpGet request = new HttpGet(APP_PAGE); request.addHeader("User-Agent", USER_AGENT); request.addHeader("Cookie", "_twitter_sess=" + _session_id + "; auth_token=" + _cookie_auth); try { String result = _client.execute(request, new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { switch (response.getStatusLine().getStatusCode()) { case HttpStatus.SC_OK: return EntityUtils.toString(response.getEntity(), "UTF-8"); case HttpStatus.SC_NOT_FOUND: throw new RuntimeException("not found"); default: throw new RuntimeException("error"); } } }); Document doc = null; doc = Jsoup.parse(result); // parse top page and get authenticity token Elements forms = doc.getElementsByTag("form"); for (Element e : forms) { Elements auths = e.getElementsByAttributeValue("name", "authenticity_token"); if (auths.size() > 0) { _auth_token = auths.get(0).attr("value"); break; } } Elements apps = doc.getElementsByClass("app"); for (Element e : apps) { LinkedApp app = new LinkedApp(); if (e.getElementsByTag("strong").size() > 0) app.name = e.getElementsByTag("strong").get(0).text(); if (e.getElementsByClass("creator").size() > 0) app.creator = e.getElementsByClass("creator").get(0).text(); if (e.getElementsByClass("description").size() > 0) app.desc = e.getElementsByClass("description").get(0).text(); if (e.getElementsByClass("app-img").size() > 0) app.imgUrl = e.getElementsByClass("app-img").get(0).attr("src"); if (e.getElementsByClass("revoke").size() > 0) { String tmp = e.getElementsByClass("revoke").get(0).attr("id"); app.revokeId = tmp.replaceAll(KEY_HEADER_REVOKE, ""); } else { // revoke id ????(facebook????????) continue; } _apps.add(app); } _handler.post(new Runnable() { @Override public void run() { _appadapter.notifyDataSetChanged(); } }); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:net.kevxu.purdueassist.course.ScheduleDetail.java
private ScheduleDetailEntry parseDocument(Document document) throws HtmlParseException, CourseNotFoundException, ResultNotMatchException { ScheduleDetailEntry entry = new ScheduleDetailEntry(term, crn); Elements tableElements = document.getElementsByAttributeValue("summary", "This table is used to present the detailed class information."); if (!tableElements.isEmpty()) { for (Element tableElement : tableElements) { // get basic info for selected course Element tableBasicInfoElement = tableElement.getElementsByClass("ddlabel").first(); if (tableBasicInfoElement != null) { setBasicInfo(entry, tableBasicInfoElement.text()); } else { throw new HtmlParseException("Basic info element empty."); }/* ww w.j a v a 2s . c o m*/ // get detailed course info Element tableDetailedInfoElement = tableElement.getElementsByClass("dddefault").first(); if (tableDetailedInfoElement != null) { // process seat info Elements tableSeatDetailElements = tableDetailedInfoElement.getElementsByAttributeValue( "summary", "This layout table is used to present the seating numbers."); if (tableSeatDetailElements.size() == 1) { Element tableSeatDetailElement = tableSeatDetailElements.first(); Elements tableSeatDetailEntryElements = tableSeatDetailElement.getElementsByTag("tbody") .first().children(); if (tableSeatDetailEntryElements.size() == 3 || tableSeatDetailEntryElements.size() == 4) { setSeats(entry, tableSeatDetailEntryElements.get(1).text()); setWaitlistSeats(entry, tableSeatDetailEntryElements.get(2).text()); if (tableSeatDetailEntryElements.size() == 4) { setCrosslistSeats(entry, tableSeatDetailEntryElements.get(3).text()); } } else { throw new HtmlParseException("Seat detail entry elements size not 3. We have " + tableSeatDetailEntryElements.size() + "."); } } else { throw new HtmlParseException( "Seat detail elements size not 1. We have " + tableSeatDetailElements.size() + "."); } // remove the seat info from detailed info tableSeatDetailElements.remove(); // remaining information setRemainingInfo(entry, tableDetailedInfoElement.html()); } else { throw new HtmlParseException("Detailed info element empty."); } } } else { // test empty Elements informationElements = document.getElementsByAttributeValue("summary", "This layout table holds message information"); if (!informationElements.isEmpty() && informationElements.text().contains("No detailed class information found")) { throw new CourseNotFoundException(informationElements.text()); } else { throw new HtmlParseException( "Course table not found, but page does not contain message stating no course found."); } } return entry; }
From source file:com.serphacker.serposcope.scraper.google.scraper.GoogleScraper.java
protected Status handleCaptchaRedirect(String captchaRedirect) { if (captchaRedirect == null || !captchaRedirect.contains("?continue=")) { return Status.ERROR_NETWORK; }//from w w w . j a v a 2s. co m LOG.debug("captcha form detected via {}", http.getProxy() == null ? new DirectNoProxy() : http.getProxy()); int status = http.get(captchaRedirect); if (status == 403) { return Status.ERROR_IP_BANNED; } if (solver == null) { return Status.ERROR_CAPTCHA_NO_SOLVER; } String content = http.getContentAsString(); if (content == null) { return Status.ERROR_NETWORK; } String imageSrc = null; Document captchaDocument = Jsoup.parse(content, captchaRedirect); Elements elements = captchaDocument.getElementsByTag("img"); for (Element element : elements) { String src = element.attr("abs:src"); if (src != null && src.contains("/sorry/image")) { imageSrc = src; } } if (imageSrc == null) { LOG.debug("can't find captcha img tag"); return Status.ERROR_NETWORK; } Element form = captchaDocument.getElementsByTag("form").first(); if (form == null) { LOG.debug("can't find captcha form"); return Status.ERROR_NETWORK; } String continueValue = null; String formIdValue = null; String formUrl = form.attr("abs:action"); String formQValue = null; Element elementCaptchaId = form.getElementsByAttributeValue("name", "id").first(); if (elementCaptchaId != null) { formIdValue = elementCaptchaId.attr("value"); } Element elementContinue = form.getElementsByAttributeValue("name", "continue").first(); if (elementContinue != null) { continueValue = elementContinue.attr("value"); } Element elementQ = form.getElementsByAttributeValue("name", "q").first(); if (elementQ != null) { formQValue = elementQ.attr("value"); } if (formUrl == null || (formIdValue == null && formQValue == null) || continueValue == null) { LOG.debug("invalid captcha form"); return Status.ERROR_NETWORK; } int imgStatus = http.get(imageSrc, captchaRedirect); if (imgStatus != 200 || http.getContent() == null) { LOG.debug("can't download captcha image {} (status code = {})", imageSrc, imgStatus); return Status.ERROR_NETWORK; } CaptchaImage captcha = new CaptchaImage(new byte[][] { http.getContent() }); boolean solved = solver.solve(captcha); if (!solved || !Captcha.Status.SOLVED.equals(captcha.getStatus())) { LOG.error("solver can't resolve captcha (overload ?) error = {}", captcha.getError()); return Status.ERROR_CAPTCHA_INCORRECT; } LOG.debug("got captcha response {} in {} seconds from {}", captcha.getResponse(), captcha.getSolveDuration() / 1000l, (captcha.getLastSolver() == null ? "?" : captcha.getLastSolver().getFriendlyName())); try { formUrl += "?continue=" + URLEncoder.encode(continueValue, "utf-8"); } catch (Exception ex) { } formUrl += "&captcha=" + captcha.getResponse(); if (formIdValue != null) { formUrl += "&id=" + formIdValue; } if (formQValue != null) { formUrl += "&q=" + formQValue; } int postCaptchaStatus = http.get(formUrl, captchaRedirect); if (postCaptchaStatus == 302) { String redirectOnSuccess = http.getResponseHeader("location"); if (redirectOnSuccess.startsWith("http://")) { redirectOnSuccess = "https://" + redirectOnSuccess.substring(7); } int redirect1status = http.get(redirectOnSuccess, captchaRedirect); if (redirect1status == 200) { return Status.OK; } if (redirect1status == 302) { if (http.get(http.getResponseHeader("location"), captchaRedirect) == 200) { return Status.OK; } } } if (postCaptchaStatus == 503) { LOG.debug("reporting incorrect captcha (incorrect response = {})", captcha.getResponse()); solver.reportIncorrect(captcha); } return Status.ERROR_CAPTCHA_INCORRECT; }
From source file:com.screenslicer.core.util.Util.java
public static WebElement toElement(RemoteWebDriver driver, HtmlNode htmlNode, Element body) throws ActionFailed { if (body == null) { body = Util.openElement(driver, null, null, null); }//from w w w .j a va 2 s. com if (!CommonUtil.isEmpty(htmlNode.id)) { WebElement element = toElement(driver, body.getElementById(htmlNode.id)); if (element != null) { return element; } } List<Elements> selected = new ArrayList<Elements>(); if (!CommonUtil.isEmpty(htmlNode.tagName)) { selected.add(body.getElementsByTag(htmlNode.tagName)); } else if (!CommonUtil.isEmpty(htmlNode.href)) { selected.add(body.getElementsByTag("a")); } if (!CommonUtil.isEmpty(htmlNode.name)) { selected.add(body.getElementsByAttributeValue("name", htmlNode.name)); } if (!CommonUtil.isEmpty(htmlNode.type)) { selected.add(body.getElementsByAttributeValue("type", htmlNode.type)); } if (!CommonUtil.isEmpty(htmlNode.value)) { selected.add(body.getElementsByAttributeValue("value", htmlNode.value)); } if (!CommonUtil.isEmpty(htmlNode.title)) { selected.add(body.getElementsByAttributeValue("title", htmlNode.title)); } if (htmlNode.classes != null && htmlNode.classes.length > 0) { Map<Element, Integer> found = new HashMap<Element, Integer>(); for (int i = 0; i < htmlNode.classes.length; i++) { Elements elements = body.getElementsByClass(htmlNode.classes[i]); for (Element element : elements) { if (!found.containsKey(element)) { found.put(element, 0); } found.put(element, found.get(element) + 1); } } Elements elements = new Elements(); for (int i = htmlNode.classes.length; i > 0; i--) { for (Map.Entry<Element, Integer> entry : found.entrySet()) { if (entry.getValue() == i) { elements.add(entry.getKey()); } } if (!elements.isEmpty()) { break; } } selected.add(elements); } if (!CommonUtil.isEmpty(htmlNode.href)) { Elements hrefs = body.getElementsByAttribute("href"); Elements toAdd = new Elements(); String currentUrl = driver.getCurrentUrl(); String hrefGiven = htmlNode.href; for (Element href : hrefs) { String hrefFound = href.attr("href"); if (hrefGiven.equalsIgnoreCase(hrefFound)) { toAdd.add(href); } else { String uriGiven = Util.toCanonicalUri(currentUrl, hrefGiven); String uriFound = Util.toCanonicalUri(currentUrl, hrefFound); if (uriGiven.equalsIgnoreCase(uriFound)) { toAdd.add(href); } } } selected.add(toAdd); } if (!CommonUtil.isEmpty(htmlNode.innerText)) { selected.add(body.getElementsMatchingText(Pattern.quote(htmlNode.innerText))); } if (htmlNode.multiple != null) { selected.add(body.getElementsByAttribute("multiple")); } Map<Element, Integer> votes = new HashMap<Element, Integer>(); for (Elements elements : selected) { for (Element element : elements) { if (!Util.isHidden(element)) { if (!votes.containsKey(element)) { votes.put(element, 0); } votes.put(element, votes.get(element) + 1); } } } int maxVote = 0; Element maxElement = null; for (Map.Entry<Element, Integer> entry : votes.entrySet()) { if (entry.getValue() > maxVote) { maxVote = entry.getValue(); maxElement = entry.getKey(); } } return toElement(driver, maxElement); }