List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java
private Map<String, String> parseGeneralData(Element root, int dataType) { Map<String, String> generalData = new HashMap<String, String>(); // last update time and day Element updateTime = root.select("table.mon_head td:eq(2) p").first(); if (updateTime != null) { Pattern pat = Pattern.compile("(Stand: [\\.:0-9 ]+)", Pattern.DOTALL); Matcher matcher = pat.matcher(updateTime.text()); if (matcher.find()) generalData.put(Sync.GENERAL_DATA_UPDATETIME, matcher.group(1)); }//from w ww.j a va 2 s. co m // date the substitution table belongs to Element belongingDate = root.select("div.mon_title").first(); if (belongingDate != null) generalData.put(Sync.GENERAL_DATA_DATE, belongingDate.text()); // daily information Elements dailyInfos = root.select("table.info tr"); int i = 0; for (Element info : dailyInfos) { Elements e = info.select("td"); if (e.size() == 0) continue; String title = "", description = ""; for (TextNode node : e.first().textNodes()) title += node.text() + '\n'; title = title.trim(); // description only if available if (e.size() > 1) { for (TextNode node : e.get(1).textNodes()) description += node.text() + '\n'; description = title.trim(); } String keyTitle = "", keyDescription = ""; switch (i) { case 0: keyTitle = Sync.GENERAL_DATA_DAILYINFO_1_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_1_DESCRIPTION; break; case 1: keyTitle = Sync.GENERAL_DATA_DAILYINFO_2_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_2_DESCRIPTION; break; case 2: keyTitle = Sync.GENERAL_DATA_DAILYINFO_3_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_3_DESCRIPTION; break; default: break; } if (!keyTitle.equals("")) { generalData.put(keyTitle, title); generalData.put(keyDescription, description); } i++; } generalData.put(Sync.GENERAL_DATA_DATATYPE, String.valueOf(dataType)); return generalData; }
From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java
private void setVideoCount(Document doc, Video video) { Elements countElements = doc.select("div#video_favorite_edit span"); if (CollectionUtils.isNotEmpty(countElements)) { Elements countWantedElements = countElements.select("#subscribed a"); if (CollectionUtils.isNotEmpty(countWantedElements)) { String countWanted = countWantedElements.first().text(); try { video.setCountWanted(Integer.valueOf(countWanted)); } catch (Exception e) { }/*from www. j av a 2 s . c om*/ } Elements countWatchedElements = countElements.select("#watched a"); if (CollectionUtils.isNotEmpty(countWatchedElements)) { String countWatched = countWatchedElements.first().text(); try { video.setCountWatched(Integer.valueOf(countWatched)); } catch (Exception e) { } } Elements countOwnedElements = countElements.select("#owned a"); if (CollectionUtils.isNotEmpty(countOwnedElements)) { String countOwned = countOwnedElements.first().text(); try { video.setCountOwned(Integer.valueOf(countOwned)); } catch (Exception e) { } } } }
From source file:org.confab.PhpBB3Parser.java
/** * Parses each post for a particular topic. * @param html Html containing the posts to be parsed * @return List of Post objects *///w w w .java2s .c o m public List<Post> parsePosts(Document html, ForumThread parent) { Utilities.debug("Starting parsePosts"); List<Post> ret = new ArrayList<Post>(); // Each post should have it's own table Elements div_posts = html.select("div#posts"); assert !div_posts.isEmpty(); Elements posts_table = div_posts.select("table[id~=(post\\d+)]"); assert !posts_table.isEmpty(); for (Element el_post : posts_table) { Post new_post = new Post(parent); // Get post id (id=post\d+) new_post.id = el_post.attr("id").replace("post", "").trim(); assert new_post.id != null; // Get post message Elements el_message = el_post.select("div[id~=(post_message_\\d+)]"); assert !el_message.isEmpty(); new_post.message = el_message.first().text(); assert new_post.message != null; Utilities.debug("new_post.message: " + new_post.message); // Get post author Elements el_author = el_post.select(".bigusername"); assert !el_author.isEmpty(); new_post.author.username = el_author.first().text(); assert new_post.author != null; Utilities.debug("new_post.author: " + new_post.author); ret.add(new_post); } Utilities.debug("Finished parsePosts"); return ret; }
From source file:prince.app.ccm.tools.Task.java
public String getFormParams(String html, String username, String password) throws UnsupportedEncodingException { System.out.println("Extracting form's data..."); Document doc = Jsoup.parse(html); // Google form id Element loginform = doc.getElementById("contenido_right"); Elements loginaction = doc.getElementsByTag("form"); Element form = loginaction.first(); log = MAIN_PAGE + form.attr("action"); Log.e(TAG, "Action: " + log); Elements inputElements = loginform.getElementsByTag("input"); List<String> paramList = new ArrayList<String>(); for (Element inputElement : inputElements) { String key = inputElement.attr("name"); String value = inputElement.attr("value"); if (key.equals("usuario")) { value = username;//from w w w. j a va 2 s . c o m paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8")); } else if (key.equals("contrasena")) { value = password; paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8")); } } // build parameters list StringBuilder result = new StringBuilder(); for (String param : paramList) { if (result.length() == 0) { result.append(param); } else { result.append("&" + param); } } Log.d(TAG, "Done in getFormParams: " + result.toString()); return result.toString(); }
From source file:com.github.binlee1990.transformers.spider.PersonCrawler.java
@Override public void visit(Page page) { int docid = page.getWebURL().getDocid(); String url = page.getWebURL().getURL(); logger.info(url);/*from ww w .j a v a 2 s. com*/ if (!url.startsWith("http://www.javlibrary.com/cn/?v=jav")) { return; } if (page.getParseData() instanceof HtmlParseData) { HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); String html = htmlParseData.getHtml(); Document doc = Jsoup.parse(html); String videoIdentificationCode = doc.select("div#video_id td.text").first().text().toString(); Video queryVideo = new Video(); queryVideo.setIdentificationCode(videoIdentificationCode); Video video = videoMapper.queryByVideo(queryVideo); if (null != video) { return; } video = new Video(); video.setUrl(url); Date now = new Date(); video.setCreateTime(now); video.setUpdateTime(now); String title = doc.select("div#video_title a").first().text().toString(); video.setTitle(title); video.setIdentificationCode(videoIdentificationCode); Elements rdElements = doc.select("div#video_date td.text"); if (CollectionUtils.isNotEmpty(rdElements)) { String releaseDate = rdElements.first().text().toString(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); try { Date date = sdf.parse(releaseDate); video.setReleaseDate(date); } catch (ParseException e) { } } Elements dmElements = doc.select("div#video_length span.text"); if (CollectionUtils.isNotEmpty(dmElements)) { String durationMinutes = dmElements.first().text().toString(); video.setDurationMinutes(Integer.valueOf(durationMinutes)); } Elements dElements = doc.select("div#video_director td.text"); if (CollectionUtils.isNotEmpty(dElements)) { String director = dElements.first().text().toString(); video.setDirector(director); } Elements pElements = doc.select("div#video_maker td.text"); if (CollectionUtils.isNotEmpty(pElements)) { String producer = pElements.first().text().toString(); video.setProducer(producer); } Elements disElements = doc.select("div#video_label td.text"); if (CollectionUtils.isNotEmpty(disElements)) { String distributor = disElements.first().text().toString(); video.setDistributor(distributor); } Elements countElements = doc.select("div#video_favorite_edit span"); if (CollectionUtils.isNotEmpty(countElements)) { Elements countWantedElements = countElements.select("#subscribed a"); if (CollectionUtils.isNotEmpty(countWantedElements)) { String countWanted = countWantedElements.first().text(); try { video.setCountWanted(Integer.valueOf(countWanted)); } catch (Exception e) { } } Elements countWatchedElements = countElements.select("#watched a"); if (CollectionUtils.isNotEmpty(countWatchedElements)) { String countWatched = countWatchedElements.first().text(); try { video.setCountWatched(Integer.valueOf(countWatched)); } catch (Exception e) { } } Elements countOwnedElements = countElements.select("#owned a"); if (CollectionUtils.isNotEmpty(countOwnedElements)) { String countOwned = countOwnedElements.first().text(); try { video.setCountOwned(Integer.valueOf(countOwned)); } catch (Exception e) { } } } Elements sElements = doc.select("div#video_review td.text span.score"); if (CollectionUtils.isNotEmpty(sElements)) { String score = sElements.first().text().toString(); score = StringUtils.replace(score, "(", ""); score = StringUtils.replace(score, ")", ""); if (StringUtils.isNotBlank(score)) { try { video.setScore(Float.valueOf(score)); } catch (Exception e) { } } } Elements actressElements = doc.select("div#video_cast span.star"); if (CollectionUtils.isNotEmpty(actressElements)) { if (actressElements.size() <= 1) { video.setSingleFemaleFlag(true); } else { video.setSingleFemaleFlag(false); } } videoMapper.insertSelective(video); int videoId = videoMapper.queryByVideo(video).getId(); logger.info("handle " + videoId + "\n" + JSON.toJSONString(video)); if (CollectionUtils.isNotEmpty(actressElements)) { actressElements.stream().forEach(a -> { String aName = a.text().toString().trim(); if (StringUtils.isNotBlank(aName)) { Actress queryActress = new Actress(); queryActress.setName(aName); Actress actress = actressMapper.queryByActress(queryActress); if (null != actress) { VideoActress va = new VideoActress(); va.setActressCode(actress.getCode()); va.setVideoId(videoId); videoActressMapper.insertSelective(va); } else { actress = new Actress(); actress.setName(aName); actressMapper.insertSelective(actress); int actressId = actressMapper.queryByActress(actress).getId(); VideoActress va = new VideoActress(); va.setActressCode(actress.getCode()); va.setVideoId(videoId); videoActressMapper.insertSelective(va); } } }); } Elements categoryElements = doc.select("div#video_genres span.genre"); if (CollectionUtils.isNotEmpty(categoryElements)) { categoryElements.stream().forEach(c -> { String cDescription = c.text().toString().trim(); if (StringUtils.isNotBlank(cDescription)) { Category queryCategory = new Category(); queryCategory.setSubtype(cDescription); Category category = categoryMapper.queryByCategory(queryCategory); if (null != category) { VideoCategory vc = new VideoCategory(); vc.setCategoryId(category.getId()); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); videoCategoryMapper.insertSelective(vc); } else { category = new Category(); category.setSubtype(cDescription); categoryMapper.insertSelective(category); int categoryId = categoryMapper.queryByCategory(category).getId(); VideoCategory vc = new VideoCategory(); vc.setCategoryId(categoryId); vc.setCategoryDescription(category.getSubtype()); vc.setVideoId(videoId); videoCategoryMapper.insertSelective(vc); } } }); } } }
From source file:org.confab.VBulletinParser.java
public List<Forum> parseForums(Document root, BulletinBoard parent) { Utilities.debug("parseForums"); List<Forum> ret = new ArrayList<Forum>(); // get table/* w ww .j a v a2 s. c o m*/ Elements forum_table = root.select("tbody[id*=collapseobj_forumbit_] tr"); assert !forum_table.isEmpty(); for (Element el_tr : forum_table) { Forum new_forum = new Forum(parent); // Get the table data for this row Elements el_tds = el_tr.select("td"); assert !el_tds.isEmpty() : el_tr.html(); // xbox360achievements has a lot of subforums and puts these in their own table // The <a>'s are picked up as children of the parent <td> so don't parse this sub- // tables row's seperatly if (!el_tds.select("td.thead").isEmpty() || el_tds.size() < 3) { //Utilities.debug("tr doesn't seem to have anything we want, skipping."); continue; } // Get the title URL Elements els_a = el_tds.get(1).select("a"); assert !els_a.isEmpty() : el_tds.html(); new_forum.url = els_a.first().attr("href"); assert new_forum.url != null; Utilities.debug("new_forum.url : " + new_forum.url); // Get the title text assert els_a.first() != null; new_forum.title = els_a.first().text(); assert new_forum.title != null; Utilities.debug("new_forum.title : " + new_forum.title); // Check for any subforums in remaining a elements els_a.remove(els_a.first()); for (Element el_a : els_a) { Forum sub_forum = new Forum(parent); sub_forum.url = el_a.attr("href"); assert sub_forum.url != null; sub_forum.title = el_a.text(); assert sub_forum.title != null; new_forum.subForums.add(sub_forum); Utilities.debug("added subForum: " + sub_forum.title); } // Get num viewing the current forum Element el_viewing = el_tr.select(":matchesOwn((\\d+ Viewing))").first(); if (el_viewing != null) { new_forum.numViewing = el_viewing.text(); } else { new_forum.numViewing = "0"; } Utilities.debug("new_forum.numViewing : " + new_forum.numViewing); // Get the description/message of this topic Element el_description = el_tds.get(1).select("div.smallfont").first(); if (el_description != null) { new_forum.description = el_description.text(); } else { new_forum.description = ""; } Utilities.debug("new_forum.description : " + new_forum.description); Utilities.debug("new_forum.parent.url : " + new_forum.parent.url); ret.add(new_forum); Utilities.debug("-----"); } Utilities.debug("end parseForums"); return ret; }
From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java
@Override public List<RawBankAccount> getBankAccounts(String authTokenString) throws IOException, ParseException, AuthenticationException { AuthToken authToken = AuthToken.fromJson(authTokenString); String response = loadPageWithAuth(getHttpClient(), authToken, LIST_ACCOUNTS_XML_ID); Document doc = Jsoup.parse(response, BASE_URL); Element content = doc.getElementById("main"); if (content == null) { throw new ParseException("getBankAccounts: can't find #main"); }//from www . java2 s.com Elements tables = content.select("section.result table.data"); if (tables == null || tables.size() == 0) { throw new ParseException("getBankAccounts: can't find table section.result table.data"); } Elements rows = tables.first().getElementsByTag("tr"); if (rows == null || rows.size() == 0) { throw new ParseException("getBankAccounts: first table is empty"); } ArrayList<RawBankAccount> bankAccounts = new ArrayList<RawBankAccount>(rows.size()); String type = "undef"; for (Element row : rows) { if (row.getElementsByTag("th").size() > 0) { // header row type = row.child(0).text(); } else { RawBankAccount bankAccount = obtainBankAccountFromHtmlTableRow(type, row); if (bankAccount != null) { bankAccounts.add(bankAccount); } } } return bankAccounts; }
From source file:org.jresponder.message.MessageRefImpl.java
/** * Render a message in the context of a particular subscriber * and subscription./* www . j a va 2s . com*/ */ @Override public boolean populateMessage(MimeMessage aMimeMessage, SendConfig aSendConfig, Subscriber aSubscriber, Subscription aSubscription) { try { // prepare context Map<String, Object> myRenderContext = new HashMap<String, Object>(); myRenderContext.put("subscriber", aSubscriber); myRenderContext.put("subscription", aSubscription); myRenderContext.put("config", aSendConfig); myRenderContext.put("message", this); // render the whole file String myRenderedFileContents = TextRenderUtil.getInstance().render(fileContents, myRenderContext); // now parse again with Jsoup Document myDocument = Jsoup.parse(myRenderedFileContents); String myHtmlBody = ""; String myTextBody = ""; // html body Elements myBodyElements = myDocument.select("#htmlbody"); if (!myBodyElements.isEmpty()) { myHtmlBody = myBodyElements.html(); } // text body Elements myJrTextBodyElements = myDocument.select("#textbody"); if (!myJrTextBodyElements.isEmpty()) { myTextBody = TextUtil.getInstance().getWholeText(myJrTextBodyElements.first()); } // now build the actual message MimeMessage myMimeMessage = aMimeMessage; // wrap it in a MimeMessageHelper - since some things are easier with that MimeMessageHelper myMimeMessageHelper = new MimeMessageHelper(myMimeMessage); // set headers // subject myMimeMessageHelper.setSubject(TextRenderUtil.getInstance() .render((String) propMap.get(MessageRefProp.JR_SUBJECT.toString()), myRenderContext)); // TODO: implement DKIM, figure out subetha String mySenderEmailPattern = aSendConfig.getSenderEmailPattern(); String mySenderEmail = TextRenderUtil.getInstance().render(mySenderEmailPattern, myRenderContext); myMimeMessage.setSender(new InternetAddress(mySenderEmail)); myMimeMessageHelper.setTo(aSubscriber.getEmail()); // from myMimeMessageHelper.setFrom( TextRenderUtil.getInstance() .render((String) propMap.get(MessageRefProp.JR_FROM_EMAIL.toString()), myRenderContext), TextRenderUtil.getInstance() .render((String) propMap.get(MessageRefProp.JR_FROM_NAME.toString()), myRenderContext)); // see how to set body // if we have both text and html, then do multipart if (myTextBody.trim().length() > 0 && myHtmlBody.trim().length() > 0) { // create wrapper multipart/alternative part MimeMultipart ma = new MimeMultipart("alternative"); myMimeMessage.setContent(ma); // create the plain text BodyPart plainText = new MimeBodyPart(); plainText.setText(myTextBody); ma.addBodyPart(plainText); // create the html part BodyPart html = new MimeBodyPart(); html.setContent(myHtmlBody, "text/html"); ma.addBodyPart(html); } // if only HTML, then just use that else if (myHtmlBody.trim().length() > 0) { myMimeMessageHelper.setText(myHtmlBody, true); } // if only text, then just use that else if (myTextBody.trim().length() > 0) { myMimeMessageHelper.setText(myTextBody, false); } // if neither text nor HTML, then the message is being skipped, // so we just return null else { return false; } return true; } catch (MessagingException e) { throw new RuntimeException(e); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } }
From source file:tkbautobooking.BookingSystem.java
private void praseLoginPage() throws Exception { Document doc = Jsoup.parse(LoginPageHTML); Elements token_elm = doc.getElementsByAttributeValue("name", "access_token"); if (token_elm.isEmpty() || token_elm.size() > 1 || !token_elm.first().hasAttr("value")) throw new Exception("Prase Login Page fail !"); login_hidden_token = token_elm.first().attr("value"); }
From source file:net.devietti.ArchConfMapServlet.java
/** Fetch info for a list of conferences from WikiCFP */ private List<Conf> getConfInfo(List<String> confs) throws IOException { String query = StringUtils.join(confs, "+"); List<Conf> results = new LinkedList<Conf>(); /*//from ww w .jav a 2s . c o m * NB: year=f returns hits for this year and future years. This is exactly what we want, since * we automatically discard conferences that have already happened. */ Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query); Elements rows = doc.select("div[class=contsec] table table tr"); for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) { final Element firstRow = iter.next(); final Elements confName = firstRow.select("td a"); if (confName.isEmpty()) continue; final Conf conf = new Conf(); // make sure we match one of the conferences we're interested in String cn = confName.first().text().split(" ")[0]; int found = Arrays.binarySearch(CONFERENCE_NAMES, cn); if (found < 0) continue; // not found final String confFullName = firstRow.select("td").get(1).text(); // don't match other ICS conferences, eg Information, Communication, Society if (CONFERENCE_NAMES[found].equals("ICS")) { if (!confFullName.toLowerCase().contains("supercomputing")) { continue; } } // don't match other CC conferences, eg Creative Construction if (CONFERENCE_NAMES[found].equals("CC")) { if (!confFullName.toLowerCase().contains("compiler")) { continue; } } conf.name = confName.first().text(); /* * we found a hit! The conference information is split across two <tr> table elements. * Conference name and link to cfp are in the first <tr>, and dates, location and deadline * in the second. */ final Element secondRow = iter.next(); String dates = secondRow.select("td").first().text(); String startDate = dates.substring(0, dates.indexOf('-')).trim(); conf.start = cfpDateFormat.parseDateTime(startDate); conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim()); conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end); if (conf.start.year().equals(conf.end.year()) && conf.start.monthOfYear().equals(conf.end.monthOfYear())) { conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-" + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start); } String deadline = secondRow.select("td").get(2).text().trim(); if (deadline.contains("(")) { // abstract deadline may be in parentheses deadline = deadline.substring(0, deadline.indexOf('(')).trim(); } conf.deadline = cfpDateFormat.parseDateTime(deadline); conf.url = "http://www.wikicfp.com" + confName.attr("href"); /* * extract the WikiCFP eventid from the link, so that, later on, the client can pull the * cfp page and get the direct conference site link. */ com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url); String[] eid = url.getParameters("eventid"); if (0 == eid.length) continue; try { conf.eventid = Integer.valueOf(eid[0]); } catch (NumberFormatException e) { error("invalid event id " + eid); continue; } conf.location = secondRow.select("td").get(1).text(); results.add(conf); } return results; }