List of usage examples for org.jsoup.nodes Comment getData
public String getData()
From source file:org.brnvrn.Main.java
private static boolean parseComment(Tool tool, Comment comment) { String field = comment.getData(); if (field == null) return false; if (field.contains("erified")) { Matcher m = p.matcher(field); if (m.find()) tool.setVerified(m.group(0)); } else if (field.contains("pdate")) { Matcher m = p.matcher(field); if (m.find()) tool.setLast_update(m.group(0)); } else/* ww w. j a va 2 s . c o m*/ System.out.println(" # Unknown comment " + field); return true; }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
static String findLastChange(Element doc, SubstitutionScheduleData scheduleData) { String lastChange = null;/*from w w w. j av a 2 s.com*/ boolean lastChangeLeft = false; if (scheduleData != null) { if (scheduleData.getData().has("stand_links")) { // backwards compatibility lastChangeLeft = scheduleData.getData().optBoolean("stand_links", false); } else { lastChangeLeft = scheduleData.getData().optBoolean(PARAM_LAST_CHANGE_LEFT, false); } } if (doc.select("table.mon_head").size() > 0) { Element monHead = doc.select("table.mon_head").first(); lastChange = findLastChangeFromMonHeadTable(monHead); } else if (lastChangeLeft) { final String bodyHtml = doc.select("body").size() > 0 ? doc.select("body").html() : doc.html(); lastChange = bodyHtml.substring(0, bodyHtml.indexOf("<p>") - 1); } else { List<Node> childNodes; if (doc instanceof Document) { childNodes = ((Document) doc).body().childNodes(); } else { childNodes = doc.childNodes(); } for (Node node : childNodes) { if (node instanceof Comment) { Comment comment = (Comment) node; if (comment.getData().contains("<table class=\"mon_head\">")) { Document commentedDoc = Jsoup.parse(comment.getData()); Element monHead = commentedDoc.select("table.mon_head").first(); lastChange = findLastChangeFromMonHeadTable(monHead); break; } } } } return lastChange; }
From source file:model.ParseInfoFromSite.java
/** * Parse info about a bank from NBU site and return it * //from w ww. ja v a 2 s .c om * @param URL * on page with information about one bank * @return Map <String,String> with information about bank */ public Map<String, String> getBankInfoMap(String URL) { logger.info("run"); Map<String, String> map = new HashMap<String, String>(); Document doc; try { doc = Jsoup.connect(URL).userAgent("Mozilla").timeout(10 * 1000).get(); List<Comment> comments = findAllComments(doc); for (Comment comment : comments) { String data = comment.getData(); comment.after(data); comment.remove(); } Elements cells = doc.getElementsByClass("cell"); for (int i = 0; i < cells.size(); i = i + 2) { map.put(cells.get(i).text(), cells.get(i + 1).text()); } } catch (IOException e) { e.printStackTrace(); } return map; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
protected SearchRequestResult parse_search(String html, int page) { Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url);/*from w ww. j ava 2s. c o m*/ Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer"); List<SearchResult> results = new ArrayList<>(); for (int i = 0; i < table.size(); i++) { Element tr = table.get(i); SearchResult sr = new SearchResult(); int contentindex = 1; if (tr.select("td a img").size() > 0) { String[] fparts = tr.select("td a img").get(0).attr("src").split("/"); String fname = fparts[fparts.length - 1]; if (data.has("mediatypes")) { try { sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname))); } catch (JSONException | IllegalArgumentException e) { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } else { sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "") .replace(".gif", "").replace(".png", ""))); } } else { if (tr.children().size() == 3) { contentindex = 2; } } sr.setInnerhtml(tr.child(contentindex).child(0).html()); sr.setNr(i); Element link = tr.child(contentindex).select("a").first(); try { if (link != null && link.attr("href").contains("detmediennr")) { Map<String, String> params = getQueryParamsFirst(link.attr("abs:href")); String nr = params.get("detmediennr"); if (Integer.parseInt(nr) > i + 1) { // Seems to be an ID if (params.get("detDB") != null) { sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB")); } else { sr.setId("&detmediennr=" + nr); } } } } catch (Exception e) { } try { if (tr.child(1).childNode(0) instanceof Comment) { Comment c = (Comment) tr.child(1).childNode(0); String comment = c.getData().trim(); String id = comment.split(": ")[1]; sr.setId(id); } } catch (Exception e) { e.printStackTrace(); } results.add(sr); } int results_total = -1; if (doc.select(".result_gefunden").size() > 0) { try { results_total = Integer.parseInt( doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1")); } catch (NumberFormatException e) { e.printStackTrace(); results_total = -1; } } return new SearchRequestResult(results, results_total, page); }