List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:lolthx.autohome.buy.AutohomePriceListFetch.java
@Override public void parse(String result, Task task) throws Exception { if (StringUtils.isBlank(result)) { return;/*w w w . jav a2 s.co m*/ } Date start = task.getStartDate(); Date end = task.getEndDate(); Document doc = Jsoup.parse(result); Elements lis = doc.select("li.price-item"); AutohomePriceInfoBean bean = new AutohomePriceInfoBean(); for (Element li : lis) { try { Elements postTimeEl = li.select("div.user-name span"); String postTime = ""; if (!postTimeEl.isEmpty()) { postTime = StringUtils.trim( StringUtils.substringBefore(postTimeEl.first().text(), "?").replaceAll("", "")); if (!isTime(postTime, start, end)) { continue; } } bean.setPostTime(postTime); bean.setUrl(task.getUrl()); bean.setForumId(StringUtils.substringBefore(task.getExtra(), ":")); bean.setProjectName(task.getProjectName()); bean.setKeyword(StringUtils.substringAfter(task.getExtra(), ":")); // post id Elements id = li.select("div.price-share a.share"); if (!id.isEmpty()) { String idStr = id.first().attr("data-target"); idStr = StringUtils.substringAfterLast(idStr, "_"); if (StringUtils.isBlank(idStr)) { continue; } bean.setId(idStr); } // Elements user = li.select("div.user-name a"); if (!user.isEmpty()) { String userUrl = user.first().absUrl("href"); String userId = StringUtils.substringAfterLast(userUrl, "/"); String userName = user.first().text(); bean.setUserId(userId); bean.setUserUrl(userUrl); bean.setUserName(userName); } Elements dataLis = li.select("div.price-item-bd li"); for (Element dataLi : dataLis) { String data = dataLi.text(); if (StringUtils.startsWith(data, "")) { bean.setCar(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setGuidePrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setTotalPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPurchaseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setCommercialInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setVehicleUseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setCompulsoryInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setLicenseFee(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setPromotion(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setBuyTime(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { String area = StringUtils.trim(StringUtils.substringAfter(data, "")); String[] pAndC = StringUtils.splitByWholeSeparator(area, ",", 2); if (pAndC.length == 1) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[0]); } if (pAndC.length == 2) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[1]); } } if (StringUtils.startsWith(data, "")) { Elements level = dataLi.select("span.level"); // if (!level.isEmpty()) { bean.setSellerComment(level.first().text()); } // ? Elements seller = dataLi.select("a.title"); if (!seller.isEmpty()) { String sellerUrl = seller.first().absUrl("href"); String sellerName = seller.first().text(); String sellerId = StringUtils.substringAfterLast(sellerUrl, "/"); bean.setSellerId(sellerId); bean.setSellerName(sellerName); bean.setSellerUrl(sellerUrl); } // ? Elements sellerPhone = dataLi.select("em.phone-num"); if (!sellerPhone.isEmpty()) { bean.setSellerPhone(sellerPhone.first().text()); } // ? // Elements sellerAddress = // dataLi.select("em.phone-num"); } if (StringUtils.startsWith(data, "?")) { bean.setBuyFeeling(StringUtils.trim(StringUtils.substringAfter(data, ""))); } } bean.saveOnNotExist(); } catch (Exception e) { e.printStackTrace(); continue; } } }
From source file:me.rkfg.xmpp.bot.plugins.CoolStoryPlugin.java
private String fetchStory(Website website) throws IOException { int roll = 0; String result;// w w w . j a v a 2 s . c o m int resultLength; int resultLines; //noinspection ConstantConditions do { roll++; final Document doc = Jsoup.connect(website.getUrlString()).userAgent(DEFAULT_UA).get(); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); logger.info("Fetched a story from {}", doc.location()); final Element story = doc.select(website.getCssQuery()).first(); if (story == null) { return ERROR_COULD_NOT_PARSE; } story.select("div").remove(); story.select("img").forEach(img -> img.replaceWith(new TextNode(img.attr("src"), ""))); story.select("br").after("\\n"); story.select("p").before("\\n\\n"); final String storyHtml = story.html().replaceAll("\\\\n", "\n"); result = Jsoup.clean(storyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)) .trim(); resultLength = result.length(); resultLines = countLines(result); } while (CONFIG_REROLL_LONG_STORIES && (resultLength > CONFIG_MAX_STORY_LENGTH || resultLines > CONFIG_MAX_STORY_LINES) && roll <= CONFIG_MAX_ROLLS); return result; }
From source file:me.vertretungsplan.parser.SVPlanParser.java
private void setDate(Element svp, Document doc, SubstitutionScheduleDay day) { String date = "Unbekanntes Datum"; if (svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0) { date = svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").text() .replaceAll("Vertretungsplan (fr )?", "").trim(); } else if (doc.title().startsWith("Vertretungsplan fr ")) { date = doc.title().substring("Vertretungsplan fr ".length()); }// w w w . j a va2 s .c om date = date.replaceAll("\\s+", " "); day.setDateString(date); day.setDate(ParserUtils.parseDate(date)); if (svp.select(".svp-uploaddatum, .Stand").size() > 0) { String lastChange = svp.select(".svp-uploaddatum, .Stand").text().replace("Aktualisierung: ", "") .replace("Stand: ", ""); day.setLastChangeString(lastChange); day.setLastChange(ParserUtils.parseDateTime(lastChange)); } }
From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java
@RequestMapping("/uefa/teams") public List<Team> teams() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Team> result = new ArrayList<Team>(); String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html"; //On se connecte au site et on charge le document html Document doc;//from w w w .j a va 2 s .c o m try { doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("table"); for (Element element : elements) { Element tbody = element.getElementsByTag("tbody").first(); for (Element child : tbody.children()) { Element teamNameElement = child.getElementsByClass("tbl-teamname").first(); String name = teamNameElement.text(); String countryCode = child.getElementsByClass("tbl-countrycode").first().text(); String imgUrl = teamNameElement.select("img").first().absUrl("src"); Team team = new Team(); team.setName(name); team.setCountryCode(countryCode); team.setImgUrl(imgUrl); team.setNationalTeam(true); result.add(team); } } //String titre = element.text(); } catch (IOException e) { e.printStackTrace(); } // RestTemplate restTemplate = new RestTemplate(); // ResponseEntity<ChampionListDto> response = restTemplate.getForEntity( // uri, // ChampionListDto.class); // // List<ChampionDto> champions = response.getBody().getChampions(); // return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList()); result.sort((t1, t2) -> t1.getName().compareTo(t2.getName())); if (pseudoCache == null) pseudoCache = result; return result; }
From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java
private List<List<String>> parseRows(Element root) { Element table = root.select("table.mon_list").first(); // each row has categories.size() categories, build a two dimensional array: // <row-index><category-index> = <value> // rows[0] is the name of the class, if multiple classes are set there, split them (separator: ,) List<List<String>> allRows = new ArrayList<List<String>>(); Elements rows = table.select("tr:gt(0)"); for (Element row : rows) { int i = 0; ArrayList<String> newrow = new ArrayList<String>(); String[] pendingClasses = null; // each category for (Element categ : row.select("td")) { if (i == 0) { // split class field by separator(,) if needed String text = categ.text(); pendingClasses = text.split(","); }//from w w w .j a v a 2 s . c o m // dont add class if multiple classes are given if (i != 0 || (pendingClasses == null || pendingClasses.length == 0)) newrow.add(categ.text()); // Log.i(LOG_TAG, categ.text()); i++; } // add row with category info to allRows array, if not multiple classes if (pendingClasses == null || pendingClasses.length == 0) allRows.add(newrow); // otherwise set class names to multiple rows else { for (String classN : pendingClasses) { ArrayList<String> n = (ArrayList<String>) newrow.clone(); n.add(0, classN.trim()); allRows.add(n); } } } return allRows; }
From source file:net.orzo.data.Web.java
/** * *//*from ww w .ja v a2 s. c o m*/ public List<Element> queryPage(Element root, String select, ScriptFunction fn) { MethodHandle mh; Element curr; List<Element> ans = null; // returns null in case fn is null try { if (fn != null) { for (Iterator<Element> iter = root.select(select).iterator(); iter.hasNext();) { curr = iter.next(); mh = fn.getBoundInvokeHandle(curr); mh.invoke(curr); } } else { ans = new ArrayList<>(); Iterators.addAll(ans, root.select(select).iterator()); } return ans; } catch (Throwable ex) { throw new RuntimeException(ex); } }
From source file:com.anhao.spring.service.impl.PhotosServiceImpl.java
private void getWallpaperTags(String wallpaperId) { String wallpaperUrl = "http://alpha.wallhaven.cc/wallpaper/" + wallpaperId; Document docDetails = getWallpaperHtmlDocument(wallpaperUrl); Elements Tags = docDetails.select("#tags li"); for (Element tag : Tags) { //iduuid ?wallhavenID String photosId = jobPhotosDAO.findByWallpaperId(wallpaperId); //tagUUID Element tagName = tag.select(".tagname").first(); String TagId = tagDAO.findByTagName(tagName.text()); System.out.println("wallpaperId:" + wallpaperId + "====tag name " + tagName.text()); PhotosTag photosTag = new PhotosTag(); photosTag.setPhotoId(photosId);/*from w w w . j av a2 s.c o m*/ photosTag.setTagId(TagId); photostagDAO.add(photosTag); } }
From source file:hello.Scraper.java
@Transformer(inputChannel = "channel3", outputChannel = "channel4") public DumpEntry convert(Element payload) throws ParseException { String dateStr = payload.ownText().substring(0, 19); DateFormat format = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); format.setTimeZone(TimeZone.getTimeZone("GMT")); Date timestamp = format.parse(dateStr); Elements list = payload.select("a"); String id;/*from w w w . j av a2 s.c om*/ String ref; if (list.size() > 0) { Element a = list.get(0); id = a.ownText(); ref = a.attr("href"); } else { id = "private data"; ref = null; } Element span = payload.select("span").get(0); String status = span.ownText(); return new DumpEntry(timestamp, id, ref, status); }
From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java
/** * Parst eine "Nachrichten zum Tag"-Tabelle aus Untis-Vertretungsplnen * // w w w . ja va 2 s . c o m * @param table * das <code>table</code>-Element des HTML-Dokuments, das geparst * werden soll * @param data * Daten von der Schule (aus <code>Schule.getData()</code>) * @param tag * der {@link VertretungsplanTag} in dem die Nachrichten * gespeichert werden sollen */ protected void parseNachrichten(Element table, JSONObject data, VertretungsplanTag tag) { Elements zeilen = table.select("tr:not(:contains(Nachrichten zum Tag))"); for (Element i : zeilen) { Elements spalten = i.select("td"); String info = ""; for (Element b : spalten) { info += "\n" + TextNode.createFromEncoded(b.html(), null).getWholeText(); } info = info.substring(1); // remove first \n tag.getNachrichten().add(info); } }
From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java
private List<String> parseCategories(Element root) { // get table/*from w ww. j av a 2s.c om*/ //Log.d(LOG_TAG, root.toString()); Element table = root.select("table.mon_list").first(); // category headlines List<String> categories = new ArrayList<String>(); for (Element headline : table.select("tr:first-child th")) { categories.add(headline.text()); } return categories; }