List of usage examples for org.jsoup.nodes Element hasClass
public boolean hasClass(String className)
From source file:controllers.FRBProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override//from w w w.ja va 2 s. c om public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } F.Promise<WSResponse> wsResponsePromise = WS.url("http://www.forbes.com/search/") .setQueryParameter("q", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Insert into map org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("li.edittools-contentitem"); // All articles belong to this class for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Check if specific article belongs to gallery class (therefore it contains an image) if (item.hasClass("gallery")) { // Add image key and value to map keyValue.put("image", item.select("img").attr("src")); } // Add the rest of keys and values keyValue.put("title", item.select("h2").select("a").text()); keyValue.put("content", item.select("p").first().ownText()); keyValue.put("date", item.select("time").text()); keyValue.put("url", item.select("h2").select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:controllers.KWProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override/*from w w w.jav a2 s . c o m*/ public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } F.Promise<WSResponse> wsResponsePromise = WS.url("http://knowledge.wharton.upenn.edu/") .setQueryParameter("s", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Insert into map org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("div.article.type-article.status-publish"); // All articles belong to this classes for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Check if specific article belongs to "has-post-thumbnail" class (therefore it contains an image) if (item.hasClass("has-post-thumbnail")) { // Add image key and value to map keyValue.put("image", item.select("img").attr("src")); } // Add the rest of keys and values keyValue.put("title", item.select("h2").select("a").text()); keyValue.put("content", item.select("div.attribute.categorythumbs").first().text()); keyValue.put("date", item.select("ul.datestamp").select("li").first().text()); keyValue.put("url", item.select("h2").select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:io.jari.geenstijl.API.API.java
private static Artikel parseArtikel(Element artikel_el, Context context) throws ParseException { Artikel artikel = new Artikel(); //id//from w w w . ja v a 2 s . co m artikel.id = Integer.parseInt(artikel_el.attr("id").substring(1)); //summary artikel.summary = artikel_el.select("a.more").first() != null; //titel artikel.titel = artikel_el.select("h1").text(); //plaatje if (PreferenceManager.getDefaultSharedPreferences(context).getBoolean("show_images", true)) { Element plaatje = artikel_el.select("img").first(); if (plaatje != null) { try { String url = plaatje.attr("src"); Log.d(TAG, "Downloading " + url); // artikel.plaatje = Drawable.createFromStream(((java.io.InputStream)new URL(plaatje.attr("src")).getContent()), null); artikel.plaatje = readBytes((InputStream) new URL(plaatje.attr("src")).getContent()); artikel.groot_plaatje = plaatje.hasClass("groot"); if (plaatje.hasAttr("width") && plaatje.hasAttr("height")) if (!plaatje.attr("width").equals("100") || !plaatje.attr("height").equals("100")) artikel.groot_plaatje = true; if (artikel.groot_plaatje) Log.i(TAG, " Done. Big image."); else Log.i(TAG, " Done."); } catch (Exception ex) { Log.w(TAG, "Unable to download image, Falling back... Reason: " + ex.getMessage()); artikel.plaatje = null; } } } //embed if (artikel_el.select("div.embed").first() != null) { //atm alleen support voor iframes Element frame = artikel_el.select("div.embed>iframe").first(); if (frame != null) artikel.embed = frame.attr("src"); } //embed (geenstijl.tv) if (!domain.equals("www.geenstijl.nl")) { //extract url from script Element scriptEl = artikel_el.select("script").first(); if (scriptEl != null) { String script = scriptEl.html(); Pattern pattern = Pattern.compile("'(.*)', fall"); Matcher matcher = pattern.matcher(script); if (matcher.find() && matcher.groupCount() == 1) { artikel.embed = matcher.group(1); } } } //footer shit Element footer = artikel_el.select("footer").first(); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm", Locale.US); artikel.datum = simpleDateFormat.parse(footer.select("time").first().attr("datetime")); StringTokenizer footer_items = new StringTokenizer(footer.text(), "|"); artikel.auteur = footer_items.nextToken().trim(); artikel.reacties = Integer.parseInt(footer.select("a.comments").text().replace(" reacties", "")); artikel.link = footer.select("a").first().attr("href"); //clean up artikel_el.select("h1").remove(); artikel_el.select(".embed").remove(); artikel_el.select("img").remove(); artikel_el.select("footer").remove(); artikel_el.select("a.more").remove(); artikel_el.select("script").remove(); //inhoud artikel.inhoud = artikel_el.html(); return artikel; }
From source file:coding.cowboys.scrapers.DvcMagicResalesScraper.java
public List<ResortWrapper> findResorts() { List<ResortWrapper> wrappers = new ArrayList<ResortWrapper>(); Document doc = null;//w w w. j a v a 2s . co m try { doc = Jsoup.connect(SiteUrls.DVC_MAGIC_RESALES).timeout(60000).get(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (doc != null) { for (Element element : doc.select("table#listALL")) { for (Element row : element.select("tr")) { if (row.hasClass("stat-Active")) { ResortWrapper wrapper = new ResortWrapper(); Elements data = row.select("td"); wrapper.setResort(getResortFromText(data.get(0).text())); wrapper.setUseYear(data.get(1).text()); wrapper.setPoints(data.get(2).text()); wrapper.setPricePerPoint(data.get(3).text()); wrapper.setTotalPrice(data.get(4).text()); wrapper.setPointSummary(data.get(0).text().replace(wrapper.getResort(), "")); wrapper.setUrl("http://www.dvcmagicresales.com/dvcmr/resales-all-listings/"); wrappers.add(wrapper); } } } } else { System.out.println("DVC Magic Resales returned null"); } return wrappers; }
From source file:com.liato.bankdroid.banking.banks.MinPension.java
private Account updateAccount(String URL, String selector, String name) throws IOException { String response = urlopen.open(URL); Document dResponse = Jsoup.parse(response); List<Transaction> transactions = new ArrayList<>(); String institute = ""; String subInstitute = ""; for (Element e : dResponse.select(selector)) { if (e.hasClass("GroupRow")) { institute = e.children().first().text(); } else if (e.hasClass("GroupMemberRow") || e.hasClass("SubRow")) { Elements elements = e.children(); if (elements.size() == 6) { //Special case for "Allmn pension" if (elements.get(2).text().isEmpty()) { // subInstitute = " " + elements.get(1).text(); /* Doesn't fit atm. */ } else { transactions.add(new Transaction(elements.get(5).text(), institute + subInstitute + "\n " + elements.get(1).text(), Helpers.parseBalance(elements.get(2).text()))); subInstitute = ""; }/*ww w . ja v a 2s.co m*/ } else if (elements.size() >= 7) { transactions.add( new Transaction(elements.get(6).text(), institute + "\n " + elements.get(1).text(), Helpers.parseBalance(elements.get(4).text()))); } } } balance = BigDecimal.ZERO; for (Transaction t : transactions) { balance = balance.add(t.getAmount()); } Account account = new Account(name, balance, name, Account.REGULAR, ""); account.setTransactions(transactions); return account; }
From source file:com.johan.vertretungsplan.parser.SVPlanParser.java
public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); // JSONArray urls = schule.getData().getJSONArray("urls"); String encoding = schule.getData().getString("encoding"); List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < urls.length(); i++) { JSONObject url = urls.getJSONObject(i); loadUrl(url.getString("url"), encoding, docs); }/*from w w w. j a va2 s .co m*/ LinkedHashMap<String, VertretungsplanTag> tage = new LinkedHashMap<String, VertretungsplanTag>(); for (Document doc : docs) { if (doc.select(".svp-tabelle").size() > 0) { VertretungsplanTag tag = new VertretungsplanTag(); String date = "Unbekanntes Datum"; if (doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").size() > 0) date = doc.select(".svp-plandatum-heute, .svp-plandatum-morgen").text(); else if (doc.title().startsWith("Vertretungsplan fr ")) date = doc.title().substring("Vertretungsplan fr ".length()); tag.setDatum(date); if (doc.select(".svp-uploaddatum").size() > 0) tag.setStand(doc.select(".svp-uploaddatum").text().replace("Aktualisierung: ", "")); Elements rows = doc.select(".svp-tabelle tr"); String lastLesson = ""; for (Element row : rows) { if (row.hasClass("svp-header")) continue; Vertretung vertretung = new Vertretung(); List<String> affectedClasses = new ArrayList<String>(); for (Element column : row.select("td")) { if (!hasData(column.text())) { continue; } String type = column.className(); if (type.startsWith("svp-stunde")) { vertretung.setLesson(column.text()); lastLesson = column.text(); } else if (type.startsWith("svp-klasse")) affectedClasses = Arrays.asList(column.text().split(", ")); else if (type.startsWith("svp-esfehlt")) vertretung.setPreviousTeacher(column.text()); else if (type.startsWith("svp-esvertritt")) vertretung.setTeacher(column.text()); else if (type.startsWith("svp-fach")) vertretung.setSubject(column.text()); else if (type.startsWith("svp-bemerkung")) { vertretung.setDesc(column.text()); vertretung.setType(recognizeType(column.text())); } else if (type.startsWith("svp-raum")) vertretung.setRoom(column.text()); if (vertretung.getLesson() == null) vertretung.setLesson(lastLesson); } if (vertretung.getType() == null) { vertretung.setType("Vertretung"); } for (String klasse : affectedClasses) { KlassenVertretungsplan kv = tag.getKlassen().get(klasse); if (kv == null) kv = new KlassenVertretungsplan(klasse); kv.add(vertretung); tag.getKlassen().put(klasse, kv); } } List<String> nachrichten = new ArrayList<String>(); if (doc.select("h2:contains(Mitteilungen)").size() > 0) { Element h2 = doc.select("h2:contains(Mitteilungen)").first(); Element sibling = h2.nextElementSibling(); while (sibling != null && sibling.tagName().equals("p")) { for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) nachrichten.add(nachricht); } sibling = sibling.nextElementSibling(); } } tag.setNachrichten(nachrichten); tage.put(date, tag); } else { throw new IOException("keine SVPlan-Tabelle gefunden"); } } Vertretungsplan v = new Vertretungsplan(); v.setTage(new ArrayList<VertretungsplanTag>(tage.values())); return v; }
From source file:eu.masconsult.bgbanking.banks.dskbank.DskClient.java
private RawBankAccount obtainBankAccountFromHtmlTableRow(Element row) { // skip title rows if (row.children().size() != 4) { return null; }/*from www.j a v a 2 s. c om*/ // skip header if (row.hasClass("td-header")) { return null; } String onclick = row.child(0).child(0).attr("onclick"); Matcher matcher = PATTERN_MATCH_BANK_ACCOUNT_ID.matcher(onclick); if (!matcher.find()) { throw new ParseException("can't find bank account id in " + onclick); } return new RawBankAccount().setServerId(matcher.group(1)).setName(row.child(0).text()) .setIBAN(row.child(1).text()).setCurrency(row.child(2).text()) .setBalance(Convert.strToFloat(row.child(3).text())) .setAvailableBalance(Convert.strToFloat(row.child(3).text())); }
From source file:me.vertretungsplan.parser.SVPlanParser.java
private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0 || doc.title().startsWith("Vertretungsplan fr "))) { setDate(svp, doc, day);/*from ww w. j a va2 s. c o m*/ if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) { Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr"); String lastLesson = ""; String lastClass = ""; for (Element row : rows) { if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header")) || row.select("th").size() > 0 || row.text().trim().equals("")) { continue; } Substitution substitution = new Substitution(); for (Element column : row.select("td")) { String type = column.className(); if (!hasData(column.text())) { if ((type.startsWith("svp-stunde") || type.startsWith("Stunde")) && hasData(lastLesson)) { substitution.setLesson(lastLesson); } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse")) && hasData(lastClass)) { substitution.getClasses().addAll(Arrays .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); } continue; } if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) { substitution.setLesson(column.text()); lastLesson = column.text(); } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) { substitution.getClasses().addAll(Arrays .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", ")))); lastClass = column.text(); } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setPreviousTeacher(column.text()); } } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) { if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) { substitution.setTeacher(column.text().replaceAll(" \\+$", "")); } } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) { substitution.setSubject(column.text()); } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) { substitution.setDesc(column.text()); String recognizedType = recognizeType(column.text()); substitution.setType(recognizedType); substitution.setColor(colorProvider.getColor(recognizedType)); } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) { substitution.setRoom(column.text()); } } if (substitution.getType() == null) { substitution.setType("Vertretung"); substitution.setColor(colorProvider.getColor("Vertretung")); } day.addSubstitution(substitution); } } if (svp.select(".LehrerVerplant").size() > 0) { day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text()); } if (svp.select(".Abwesenheiten").size() > 0) { day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text()); } if (svp.select("h2:contains(Mitteilungen)").size() > 0) { Element h2 = svp.select("h2:contains(Mitteilungen)").first(); Element sibling = h2.nextElementSibling(); while (sibling != null && sibling.tagName().equals("p")) { for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } sibling = sibling.nextElementSibling(); } } else if (svp.select(".Mitteilungen").size() > 0) { for (Element p : svp.select(".Mitteilungen")) { for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText() .split("<br />\\s*<br />")) { if (hasData(nachricht)) day.addMessage(nachricht); } } } v.addDay(day); } else { throw new IOException("keine SVPlan-Tabelle gefunden"); } }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex) throws IOException, OpacErrorException, JSONException { final String searchUrl; if (!initialised) { start();//from ww w. j a va 2 s. c o m } try { searchUrl = buildSearchUrl(query, pageIndex); } catch (URISyntaxException e) { throw new RuntimeException(e); } final String html = httpGet(searchUrl, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element navigation = doc.select(".result_view .navigation").first(); final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0; final Element ul = doc.select(".result_view ul.list").first(); final List<SearchResult> results = new ArrayList<>(); for (final Element li : ul.children()) { if (li.hasClass("zugangsmonat")) { continue; } final SearchResult result = new SearchResult(); final Element title = li.select(".titelinfo a").first(); result.setId(getQueryParamsFirst(title.attr("href")).get("id")); result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text()); result.setNr(results.size()); result.setPage(pageIndex); result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text())); result.setCover(getCover(li)); final String statusImg = li.select(".status img").attr("src"); result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN : statusImg.contains("-no") ? SearchResult.Status.RED : null); results.add(result); } return new SearchRequestResult(results, totalResults, pageIndex); }
From source file:com.serphacker.serposcope.scraper.google.scraper.GoogleScraper.java
protected boolean isSiteLinkElement(Element element) { if (element == null) { return false; }// ww w. ja v a 2s .co m Elements parents = element.parents(); if (parents == null || parents.isEmpty()) { return false; } for (Element parent : parents) { if (parent.hasClass("mslg") || parent.hasClass("nrg") || parent.hasClass("nrgw")) { return true; } } return false; }