List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.amastigote.xdu.query.module.EduSystem.java
private @Nullable JSONObject lessonsQuery() throws IOException, JSONException { if (!checkIsLogin(ID)) return null; URL url = new URL(SYS_HOST + "xkAction.do?actionType=6"); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID); httpURLConnection.connect();/* ww w .j av a2s . co m*/ Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312", httpURLConnection.getURL().toString()); document = Jsoup.parse(document.toString().replaceAll(" ", "")); Elements lessons = document.select("table[class=titleTop2]"); Element lessonsElement = lessons.get(1); Elements lessonsInfo = lessonsElement.select("tr[onmouseout=this.className='even';]"); int lessons_quantity = lessonsInfo.size(); JSONArray jsonArray = new JSONArray(); for (int i = 0; i < lessons_quantity;) { Element lessonInfo = lessonsInfo.get(i); Elements lessonDetails = lessonInfo.select("td"); // if (lessonDetails.get(14).text().equals("")) { i++; continue; } JSONObject JLessonObject = new JSONObject(); JLessonObject.put(CourseKey.ID, lessonDetails.get(1).text()); JLessonObject.put(CourseKey.NAME, lessonDetails.get(2).text()); JLessonObject.put(CourseKey.CREDIT, lessonDetails.get(4).text()); JLessonObject.put(CourseKey.LENGTH, lessonDetails.get(5).text()); JLessonObject.put(CourseKey.ATTR, lessonDetails.get(6).text()); JLessonObject.put(CourseKey.EXAM_TYPE, lessonDetails.get(7).text()); JLessonObject.put(CourseKey.TEACHER, lessonDetails.get(8).text()); JSONArray JLessonTimeAndPosArray = new JSONArray(); JSONObject JLessonTimeAndPos = new JSONObject(); JLessonTimeAndPos.put(CourseKey.WEEK, lessonDetails.get(12).text()); JLessonTimeAndPos.put(CourseKey.WEEK_DAY, lessonDetails.get(13).text()); JLessonTimeAndPos.put(CourseKey.SECTION_TIME, lessonDetails.get(14).text()); JLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, lessonDetails.get(15).text()); JLessonTimeAndPos.put(CourseKey.CAMPUS, lessonDetails.get(16).text()); JLessonTimeAndPos.put(CourseKey.BUILDING, lessonDetails.get(17).text()); JLessonTimeAndPos.put(CourseKey.CLASSROOM, lessonDetails.get(18).text()); JLessonTimeAndPosArray.put(JLessonTimeAndPos); i++; //??Array int row_span; //row_span?1 if ("".equals(lessonInfo.select("td").get(0).attr("rowspan"))) { row_span = 1; } else { row_span = Integer.parseInt(lessonInfo.select("td").get(0).attr("rowspan")); } //row_span?1?? for (int j = 0; j < row_span - 1; j++, i++) { Elements EExtraTimeAndPos = lessonsInfo.get(i).select("td"); JSONObject JExtraLessonTimeAndPos = new JSONObject(); JExtraLessonTimeAndPos.put(CourseKey.WEEK, EExtraTimeAndPos.get(0).text()); JExtraLessonTimeAndPos.put(CourseKey.WEEK_DAY, EExtraTimeAndPos.get(1).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_TIME, EExtraTimeAndPos.get(2).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, EExtraTimeAndPos.get(3).text()); JExtraLessonTimeAndPos.put(CourseKey.CAMPUS, EExtraTimeAndPos.get(4).text()); JExtraLessonTimeAndPos.put(CourseKey.BUILDING, EExtraTimeAndPos.get(5).text()); JExtraLessonTimeAndPos.put(CourseKey.CLASSROOM, EExtraTimeAndPos.get(6).text()); JLessonTimeAndPosArray.put(JExtraLessonTimeAndPos); } JLessonObject.put(CourseKey.TIME_AND_LOCATION_DERAIL, JLessonTimeAndPosArray); jsonArray.put(JLessonObject); } return new JSONObject().put("ARRAY", jsonArray); }
From source file:com.muzima.view.forms.HTMLFormWebViewActivity.java
private String prePopulateData() { if (formData.getJsonPayload() == null) { return formTemplate.getHtml(); }/*from www . j a v a 2s .c om*/ Document document = Jsoup.parse(formTemplate.getHtml()); String json = formData.getJsonPayload(); String htmlWithJSON = "<div id='pre_populate_data'>" + json + "</div>"; document.select("body").prepend(htmlWithJSON); return document.toString(); }
From source file:com.amastigote.xdu.query.module.EduSystem.java
private @Nullable JSONObject gradesQuery() throws IOException, JSONException { if (!checkIsLogin(ID)) { return null; }/* w w w .j a va 2 s .co m*/ URL url = new URL(SYS_HOST + GRADE_QUERY_SUFFIX); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID); httpURLConnection.connect(); Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312", httpURLConnection.getURL().toString()); document = Jsoup.parse(document.toString().replaceAll(" ", "")); JSONObject jsonObject = new JSONObject(); Elements elements_content = document.select("td[class=pageAlign]"); Elements elements_titles = document.select("b"); for (int i = 0; i < elements_titles.size(); i++) { JSONObject jsonObject_semester = new JSONObject(); String semester_key = elements_titles.get(i).text().trim(); Element table_for_this_semester = elements_content.get(i); Elements elements_rows = table_for_this_semester.select("td[align=center]"); for (int j = 0; j < elements_rows.size() / 7; j++) { JSONObject jsonObject_course = new JSONObject(); String course_key = elements_rows.get(j * 7 + 2).text().trim(); jsonObject_course.put(GradeKey.ID, elements_rows.get(j * 7).text().trim()); jsonObject_course.put(GradeKey.CREDIT, elements_rows.get(j * 7 + 4).text().trim()); jsonObject_course.put(GradeKey.ATTR, elements_rows.get(j * 7 + 5).text().trim()); jsonObject_course.put(GradeKey.GRADE, elements_rows.get(j * 7 + 6).text().trim()); jsonObject_semester.put(course_key, jsonObject_course); } jsonObject.put(semester_key, jsonObject_semester); } return jsonObject; }
From source file:abelymiguel.miralaprima.GetPrima.java
private HashMap<String, Float> getPrimaDataDMacro(String country_code, String providerUrl, String indexName) throws IOException { HashMap<String, Float> respuestaJson = new HashMap<String, Float>(); HashMap<String, Object> primaJson; Float prima_value;/*from ww w . ja v a 2 s . c o m*/ Float prima_delta; Float prima_percent; Document doc; doc = Jsoup.connect(providerUrl + indexName).get(); try { Element riskPremium = doc.select(".numero").first(); // System.out.println("Prima: " + riskPremium.text()); prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue(); Element riskDelta = doc.select(".text-success").first(); String deltaStr = riskDelta.text().substring(riskDelta.text().indexOf(">") + 1); prima_delta = Float.valueOf(deltaStr).floatValue(); // System.out.println("Trending delta: " + prima_delta); String percentStr; prima_percent = 100 * prima_delta / (prima_value - prima_delta); DecimalFormat df = new DecimalFormat("0.00"); percentStr = df.format(prima_percent); prima_percent = Float.valueOf(percentStr).floatValue(); // System.out.println("Trending prima_percent: " + prima_percent); respuestaJson.put("prima_value", prima_value); respuestaJson.put("prima_delta", prima_delta); respuestaJson.put("prima_percent", prima_percent); if (isSameDay(country_code)) { this.updatePrimaInDB(prima_value, prima_delta, prima_percent, this.getLatestPrimaIdFromDB(country_code)); } else { this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code); } } catch (Exception ex) { Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex); primaJson = getLatestPrimaFromDB(country_code); respuestaJson.put("prima_value", (Float) primaJson.get("prima_value")); respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta")); respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent")); } return respuestaJson; }
From source file:se.vgregion.portal.iframe.controller.CSViewController.java
private Map<String, String> lookupDynamicValue(PortletConfig portletConfig) { Map<String, String> dynamicFieldValueMap = new HashMap<String, String>(); try {//from w w w .j a v a2s. c o m String dynamicFieldString = portletConfig.getDynamicField(); String[] dynamicFields = dynamicFieldString.replace(" ", "").split(","); String dynamicValue; for (String dynamicField : dynamicFields) { Document doc = getDynamicFieldsDocument(portletConfig); dynamicValue = doc.select("body").get(0).getElementsByAttributeValue("name", dynamicField).get(0) .attr("value").replaceAll("\n\r", "").replaceAll("\r", "").replaceAll("\n", ""); if (dynamicValue.contains("<>")) { throw new Exception("Invalid value format [" + dynamicValue + "]"); } dynamicFieldValueMap.put(dynamicField, dynamicValue); } if (portletConfig.isRdEncode()) { addSpecialFieldForRaindance(dynamicFieldValueMap, portletConfig); } return dynamicFieldValueMap; } catch (Exception e) { LOGGER.error(e.getMessage(), e); return Collections.emptyMap(); } finally { // Remove the ThreadLocal so we don't reuse it in subsequent request which is in a reused thread. dynamicFieldsDocThreadLocal.remove(); } }
From source file:me.vertretungsplan.parser.DSBMobileParser.java
private void loadScheduleFromUrl(SubstitutionSchedule v, String url, List<String> usedUrls) throws IOException, JSONException, CredentialInvalidException, IncompatibleScheduleException { usedUrls.add(url);// w w w .jav a2s . c o m String html = httpGet(url, data.has(PARAM_ENCODING) ? data.optString(PARAM_ENCODING, null) : "UTF-8"); Document doc = Jsoup.parse(html); if (doc.title().toLowerCase().contains("untis") || doc.html().toLowerCase().contains("untis") || data.optString(PARAM_TYPE, "").equals("untis")) { parseMultipleMonitorDays(v, doc, data); } else if (doc.html().toLowerCase().contains("created by davinci") || data.optString(PARAM_TYPE, "").equals("davinci")) { Elements titles = doc.select("h2"); Elements tables = doc.select("h2 + p + table"); if (titles.size() != tables.size()) throw new IOException("Anzahl berschriften != Anzahl Tabellen"); for (int i = 0; i < titles.size(); i++) { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); String date = titles.get(i).text(); day.setDateString(date); day.setDate(ParserUtils.parseDate(date)); DaVinciParser.parseDaVinciTable(tables.get(i), v, day, colorProvider); v.addDay(day); } } else if (doc.select(".tdaktionen").size() > 0 || data.optString(PARAM_TYPE, "").equals("indiware")) { new IndiwareParser(scheduleData, cookieProvider).parseIndiwarePage(v, doc.html()); } else if (doc.text().matches(".*Fr diesen Bereich.*wurde kein Inhalt bereitgestellt\\.")) { return; } else { throw new IncompatibleScheduleException(); } if (doc.select("meta[http-equiv=refresh]").size() > 0) { Element meta = doc.select("meta[http-equiv=refresh]").first(); String attr = meta.attr("content").toLowerCase(); String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1) + attr.substring(attr.indexOf("url=") + 4); if (!usedUrls.contains(redirectUrl)) { loadScheduleFromUrl(v, redirectUrl, usedUrls); } } }
From source file:gui.InboxPanel.java
private String getPlain() { String html = BodyTextPane.getText(); Document doc = Jsoup.parseBodyFragment(html); doc.select("ds").remove(); System.out.println("DS nya diilangin : \n" + doc.select("body").html()); return doc.select("body").html(); }
From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java
private void setFilmCover(Document doc, Film film) { Elements coverElements = doc.select(".fm-intro img[src]"); if (CollectionUtils.isNotEmpty(coverElements)) { Element coverElement = coverElements.get(0); String coverUrl = coverElement.attr("src").toString(); if (StringUtils.isNotBlank(coverUrl)) { film.setCoverUrl(coverUrl);//from w ww . ja va 2 s .c o m } } }
From source file:gui.InboxPanel.java
private String getSign() { String html = BodyTextPane.getText(); Document doc = Jsoup.parseBodyFragment(html); if (doc.select("ds") != null) { //System.out.println("Signature : \n" + doc.select("ds").text()); return doc.select("ds").text(); }/*from w w w . j a v a2s . c o m*/ return null; }
From source file:com.fluidops.iwb.provider.HTMLProvider.java
@Override public void gather(List<Statement> res) throws Exception { String url = config.url;/* w w w. j a va 2 s . c om*/ Document doc = Jsoup.connect(url).get(); Elements links = doc.select("a[href]"); Elements media = doc.select("[src]"); Elements imports = doc.select("link[href]"); // Elements article = // doc.select("div.wrapper").select("div.box-shadow").select("div#content.cols").select("div.cl").select("div.crm").select("article").select("section.article").select("div.textblock").select("table"); Elements article = doc.getElementsByTag("tbody").select("tr"); Elements tableElem; URI nameURI = null; URI roadsURI = null; URI sideURI = null; URI totalURI = null; File file = new File("HTMLdata.txt"); PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(file))); out.println("Media"); print("\nMedia: (%d)", media.size()); for (Element el : media) { if (el.tagName().equals("img")) { print(" * %s: <%s> %sx%s (%s)", el.tagName(), el.attr("abs:src"), el.attr("width"), el.attr("height"), trim(el.attr("alt"), 20)); out.printf(" \n * %s: <%s> %sx%s (%s)", el.tagName(), el.attr("abs:src"), el.attr("width"), el.attr("height"), trim(el.attr("alt"), 20)); out.println(); } else { print(" * %s: <%s>", el.tagName(), el.attr("abs:src")); out.printf(" \n * %s: <%s>", el.tagName(), el.attr("abs:src")); out.println(); } } out.println("Imports"); print("\nImports: (%d)", imports.size()); for (Element link : imports) { print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel")); out.printf(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel")); out.println(); } out.println("Links"); print("\nLinks: (%d)", links.size()); for (Element link : links) { print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); out.printf(" * a: <%s> (%s)", link.attr("abs:href"), link.text()); out.println(); } /* * out.println("Custom text"); print("\nCustom: (%d)",customArt.size()); * for (Element custom:customArt){ * out.printf(" * a (%s): (%s)",custom.tagName(),custom.text()); * out.println(); } */ out.println("Article"); print("\nArticle: (%d)", article.size()); for (int i = 3; i < article.size() - 2; i++) { tableElem = article.get(i).select("td"); out.println(); if (i == 3) { nameURI = ProviderUtils.objectToUri(tableElem.get(0).text()); roadsURI = ProviderUtils.objectToUri(tableElem.get(1).text()); sideURI = ProviderUtils.objectToUri(tableElem.get(2).text()); totalURI = ProviderUtils.objectToUri(tableElem.get(3).text()); } else { res.add(ProviderUtils.createStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), RDF.TYPE, nameURI)); res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), RDFS.LABEL, tableElem.get(0).text())); res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), roadsURI, tableElem.get(1).text())); res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), sideURI, tableElem.get(2).text())); res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), totalURI, tableElem.get(3).text())); for (Element el : tableElem) { out.printf("\n * (%s): (%s)", el.tagName(), el.text()); out.println(); } } out.println(); out.printf("\n * a (%s) (%d): (%s)", article.get(i).tagName(), tableElem.size(), article.get(i).text()); out.println(); } out.close(); }