List of usage examples for org.jsoup.nodes Document getElementsByClass
public Elements getElementsByClass(String className)
From source file:ie.nuim.cs.dri.metadata.WebSearch.java
/** * * @param xmlString/* w w w . j av a 2 s. c om*/ * @param title * @return * @throws Exception */ public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception { //System.out.println("CiteSeer search returned:\n"+xmlString); Document doc = Jsoup.parse(xmlString); ROS ros = new ROS(); String articleTitle = ""; int pubYear = 0; Elements resultElements = doc.getElementsByClass("result"); for (Element result : resultElements) { Elements titleElement = result.getElementsByClass("doc_details"); System.out.println(titleElement.text() + "\t" + title); if (!titleElement.text().equalsIgnoreCase(title)) { break; } else { ros.setArticleTitle(title); Elements authorElement = result.getElementsByClass("pubinfo"); //authors=authorElement.text(); Elements yearElement = result.getElementsByClass("pubyear"); String yearStr = yearElement.text().replace(", ", ""); int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0); System.out.println("year:" + yearElement.text().replace(", ", "")); ros.setYear(year); Elements citeElement = result.getElementsByClass("citation"); String[] citedBy = citeElement.text().split(" "); int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1); ros.setCitedByCount(citeby); Elements publicationElement = result.getElementsByClass("pubvenue"); String pub = publicationElement.text().replace("- ", "").toLowerCase(); if (pub.contains("journal")) { ros.setPublicationType("Journal"); } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) { ros.setPublicationType("Conference"); } else { ros.setPublicationType(""); } ros.setPublicationName(pub); } } // } return ros; }
From source file:com.cognifide.aet.job.common.datafilters.extractelement.ExtractElementDataModifier.java
private String modifyDataForClassParam(Document document) throws ProcessingException { String result;//from w ww.java2 s. c o m Elements elements = document.getElementsByClass(elementClass); if (!elements.isEmpty()) { result = elements.outerHtml(); } else { throw new ProcessingException("No element with class=" + elementClass + " found!"); } return result; }
From source file:fr.arlefebvre.pronostics.controller.EuroMatchListController.java
@RequestMapping("/euro2016/matches") public List<Match> matches() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Match> result = new ArrayList<Match>(); String uri = "http://www.lequipe.fr/Football/Euro/Saison-2016/calendrier-resultats.html"; //On se connecte au site et on charge le document html Document doc; try {//from w ww .j a va2 s . co m doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("mainDate"); for (Element element : elements) { Element title = element.getElementsByClass("title").first(); String date = title.text(); Element tbody = element.getElementsByTag("tbody").first(); for (Element matchElement : tbody.children()) { String groupe = matchElement.getElementsByClass("date").first().text(); String home = matchElement.getElementsByClass("domicile").first().text(); String away = matchElement.getElementsByClass("exterieur").first().text(); Match m = new Match(); m.setDate(date); m.setHomeTeamId(home); m.setAwayTeamId(away); m.setGroup(groupe); result.add(m); } } } catch (IOException e) { e.printStackTrace(); } if (pseudoCache == null) pseudoCache = result; return result; }
From source file:com.abixen.platform.core.service.impl.LayoutServiceImpl.java
@Override public String htmlLayoutToJson(String htmlString) { log.debug("htmlLayoutToJson() - htmlString: " + htmlString); Document doc = Jsoup.parse(htmlString); Elements htmlRows = doc.getElementsByClass("row"); List<LayoutRowUtil> rowUtilList = new ArrayList<>(); for (Element row : htmlRows) { Document rowDoc = Jsoup.parse(row.toString()); Elements htmlColumns = rowDoc.getElementsByClass("column"); List<LayoutColumnUtil> columnUtilList = new ArrayList<>(); for (Element column : htmlColumns) { String styleClass = column.attr("class"); columnUtilList.add(new LayoutColumnUtil(styleClass.substring(styleClass.indexOf(" ") + 1))); }/*from w w w. j ava2 s. co m*/ rowUtilList.add(new LayoutRowUtil(columnUtilList)); } return "{\"rows\":" + new Gson().toJson(rowUtilList) + "}"; }
From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java
@RequestMapping("/uefa/teams") public List<Team> teams() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Team> result = new ArrayList<Team>(); String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html"; //On se connecte au site et on charge le document html Document doc; try {//from w ww.j av a 2 s . c o m doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("table"); for (Element element : elements) { Element tbody = element.getElementsByTag("tbody").first(); for (Element child : tbody.children()) { Element teamNameElement = child.getElementsByClass("tbl-teamname").first(); String name = teamNameElement.text(); String countryCode = child.getElementsByClass("tbl-countrycode").first().text(); String imgUrl = teamNameElement.select("img").first().absUrl("src"); Team team = new Team(); team.setName(name); team.setCountryCode(countryCode); team.setImgUrl(imgUrl); team.setNationalTeam(true); result.add(team); } } //String titre = element.text(); } catch (IOException e) { e.printStackTrace(); } // RestTemplate restTemplate = new RestTemplate(); // ResponseEntity<ChampionListDto> response = restTemplate.getForEntity( // uri, // ChampionListDto.class); // // List<ChampionDto> champions = response.getBody().getChampions(); // return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList()); result.sort((t1, t2) -> t1.getName().compareTo(t2.getName())); if (pseudoCache == null) pseudoCache = result; return result; }
From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java
private ImmutableMap<String, String> druckSachenProperties(Document htmlDoc) { ImmutableMap.Builder<String, String> mapBuilder = ImmutableMap.builder(); Elements keyElements = htmlDoc.getElementsByClass("kb1"); // td elements for (Element element : keyElements) { String key = removeNonBreakingSpacesAndTrim(element.text()); if (key.endsWith(":")) { key = key.substring(0, key.length() - 1); }/*from www. j av a 2 s . c o m*/ if (element.nextElementSibling() != null && !element.nextElementSibling().hasAttr("kb1")) { String value = removeNonBreakingSpacesAndTrim(element.nextElementSibling().text()); if ((!key.isEmpty()) && (!value.isEmpty())) { mapBuilder.put(key, value); } } } return mapBuilder.build(); }
From source file:eu.masconsult.bgbanking.banks.dskbank.DskClient.java
private boolean checkLoggedIn(Document doc) { Elements sup_links = doc.getElementsByClass("supplemental_links"); if (sup_links == null || sup_links.size() == 0) { throw new ParseException("getBankAccounts: can't find .supplemental_links"); }/* w w w . ja v a 2s . c o m*/ for (Element sup_link : sup_links) { Elements exits = sup_link.getElementsContainingText("Log Out"); if (exits != null && exits.size() > 0) { return true; } } return false; }
From source file:es.logongas.util.seguridad.AuthenticationProviderImplMoodle.java
@Override public Principal authenticate(Credential credential) { try {/*from w ww . j a v a 2s . c om*/ StrongPasswordEncryptor passwordEncryptor = new StrongPasswordEncryptor(); if ((credential instanceof CredentialImplLoginPassword) == false) { return null; } CredentialImplLoginPassword credentialImplLoginPassword = (CredentialImplLoginPassword) credential; if (loginAdmin.equalsIgnoreCase(credentialImplLoginPassword.getLogin())) { if (passwordEncryptor.checkPassword(credentialImplLoginPassword.getPassword(), passwordAdmin) == false) { return null; } } else { HostnameVerifier hostnameVerifier = org.apache.http.conn.ssl.SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER; HttpsURLConnection.setDefaultHostnameVerifier(hostnameVerifier); DefaultHttpClient httpClientPost = new DefaultHttpClient(); HttpPost httpPost = new HttpPost(moodleLoginURL); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("username", credentialImplLoginPassword.getLogin())); nvps.add(new BasicNameValuePair("password", credentialImplLoginPassword.getPassword())); httpPost.setEntity(new UrlEncodedFormEntity(nvps)); HttpResponse response1 = httpClientPost.execute(httpPost); InputStream inputStream = response1.getEntity().getContent(); Document document = Jsoup.parse(inputStreamToString(inputStream)); Elements divElements = document.getElementsByClass("logininfo"); if (divElements.size() == 0) { return null; } Element divElement = divElements.get(0); Elements aElements = divElement.getElementsByTag("a"); if (aElements.size() == 0) { return null; } Element aElement = aElements.get(aElements.size() - 1); if (aElement.attr("href").indexOf("logout") < 0) { return null; } DefaultHttpClient httpclient2 = new DefaultHttpClient(); HttpGet httpGet = new HttpGet(aElement.attr("href")); httpclient2.execute(httpGet); } GenericDAO<Identity, Integer> genericDAO = daoFactory.getDAO(Identity.class); Identity identity = genericDAO.readByNaturalKey(credentialImplLoginPassword.getLogin()); return identity; } catch (BusinessException ex) { return null; } catch (Exception ex) { log.info("Fallo al conectarse al moodle", ex); throw new RuntimeException(ex); } }
From source file:blackman.matt.board.Post.java
/** * Formats the HTML on the post text to accurately display it on the post. * * @param post The unformatted text of the post. * @return A formatted version of the post. */// ww w . java2 s . c om private String formatPostBody(String post) { Document formattedText = Jsoup.parse(post); Pattern p = Pattern.compile("^/.*/index\\.html"); // Red Text Elements redTexts = formattedText.getElementsByClass("heading"); for (Element text : redTexts) { text.wrap("<font color=\"#AF0A0F\"><strong></strong></font>"); } // Green text Elements greenTexts = formattedText.getElementsByClass("quote"); for (Element text : greenTexts) { text.wrap("<font color=\"#789922\"></font>"); } // Board Links Elements boardLinks = formattedText.select("a"); for (Element link : boardLinks) { String url = link.attr("href"); Matcher m = p.matcher(url); if (m.matches()) { link.attr("href", "http://8chan.co" + url); } } // Reply links Elements replyLinks = formattedText.select("a[onclick^=highlightReply"); for (Element reply : replyLinks) { repliedTo.add(reply.attr("href").split("#")[1]); boardLinks.attr("href", "http://8chan.co" + reply.attr("href")); } // Post too long text removal Elements tooLongs = formattedText.getElementsByClass("toolong"); for (Element text : tooLongs) { text.text(""); } return formattedText.toString(); }
From source file:accountgen.controller.Controller.java
private void setName(Document doc, Person p) { Elements e = doc.getElementsByClass("address"); Element name = e.select("h3").first(); p.setFirstname(StringEscapeUtils.unescapeHtml4(name.html().split(" ")[0]).trim()); p.setMiddlename(""); p.setLastname(StringEscapeUtils.unescapeHtml4(name.html().split(name.html().split(" ")[0])[1]).trim()); }