Example usage for org.jsoup.nodes Document getElementsByClass

List of usage examples for org.jsoup.nodes Document getElementsByClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByClass.

Prototype

public Elements getElementsByClass(String className) 

Source Link

Document

Find elements that have this class, including or under this element.

Usage

From source file:ie.nuim.cs.dri.metadata.WebSearch.java

/**
 *
 * @param xmlString/*  w w  w . j av  a 2  s.  c  om*/
 * @param title
 * @return
 * @throws Exception
 */
public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception {
    //System.out.println("CiteSeer search returned:\n"+xmlString);
    Document doc = Jsoup.parse(xmlString);
    ROS ros = new ROS();
    String articleTitle = "";
    int pubYear = 0;

    Elements resultElements = doc.getElementsByClass("result");

    for (Element result : resultElements) {
        Elements titleElement = result.getElementsByClass("doc_details");
        System.out.println(titleElement.text() + "\t" + title);
        if (!titleElement.text().equalsIgnoreCase(title)) {
            break;
        } else {
            ros.setArticleTitle(title);
            Elements authorElement = result.getElementsByClass("pubinfo");
            //authors=authorElement.text();
            Elements yearElement = result.getElementsByClass("pubyear");
            String yearStr = yearElement.text().replace(", ", "");
            int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0);
            System.out.println("year:" + yearElement.text().replace(", ", ""));
            ros.setYear(year);
            Elements citeElement = result.getElementsByClass("citation");
            String[] citedBy = citeElement.text().split(" ");

            int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1);
            ros.setCitedByCount(citeby);

            Elements publicationElement = result.getElementsByClass("pubvenue");
            String pub = publicationElement.text().replace("- ", "").toLowerCase();
            if (pub.contains("journal")) {
                ros.setPublicationType("Journal");
            } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) {
                ros.setPublicationType("Conference");
            } else {
                ros.setPublicationType("");
            }
            ros.setPublicationName(pub);

        }

    }
    // }

    return ros;

}

From source file:com.cognifide.aet.job.common.datafilters.extractelement.ExtractElementDataModifier.java

private String modifyDataForClassParam(Document document) throws ProcessingException {
    String result;//from  w  ww.java2 s.  c  o m
    Elements elements = document.getElementsByClass(elementClass);
    if (!elements.isEmpty()) {
        result = elements.outerHtml();
    } else {
        throw new ProcessingException("No element with class=" + elementClass + " found!");
    }
    return result;
}

From source file:fr.arlefebvre.pronostics.controller.EuroMatchListController.java

@RequestMapping("/euro2016/matches")
public List<Match> matches() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Match> result = new ArrayList<Match>();
    String uri = "http://www.lequipe.fr/Football/Euro/Saison-2016/calendrier-resultats.html";

    //On se connecte au site et on charge le document html

    Document doc;
    try {//from  w ww  .j  a va2 s  . co  m
        doc = Jsoup.connect(uri).get();

        Elements elements = doc.getElementsByClass("mainDate");
        for (Element element : elements) {
            Element title = element.getElementsByClass("title").first();
            String date = title.text();

            Element tbody = element.getElementsByTag("tbody").first();
            for (Element matchElement : tbody.children()) {
                String groupe = matchElement.getElementsByClass("date").first().text();
                String home = matchElement.getElementsByClass("domicile").first().text();
                String away = matchElement.getElementsByClass("exterieur").first().text();

                Match m = new Match();
                m.setDate(date);
                m.setHomeTeamId(home);
                m.setAwayTeamId(away);
                m.setGroup(groupe);
                result.add(m);
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:com.abixen.platform.core.service.impl.LayoutServiceImpl.java

@Override
public String htmlLayoutToJson(String htmlString) {

    log.debug("htmlLayoutToJson() - htmlString: " + htmlString);

    Document doc = Jsoup.parse(htmlString);
    Elements htmlRows = doc.getElementsByClass("row");
    List<LayoutRowUtil> rowUtilList = new ArrayList<>();

    for (Element row : htmlRows) {

        Document rowDoc = Jsoup.parse(row.toString());
        Elements htmlColumns = rowDoc.getElementsByClass("column");
        List<LayoutColumnUtil> columnUtilList = new ArrayList<>();

        for (Element column : htmlColumns) {
            String styleClass = column.attr("class");
            columnUtilList.add(new LayoutColumnUtil(styleClass.substring(styleClass.indexOf(" ") + 1)));
        }/*from w  w  w.  j ava2  s.  co  m*/

        rowUtilList.add(new LayoutRowUtil(columnUtilList));
    }

    return "{\"rows\":" + new Gson().toJson(rowUtilList) + "}";
}

From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java

@RequestMapping("/uefa/teams")
public List<Team> teams() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Team> result = new ArrayList<Team>();
    String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html";

    //On se connecte au site et on charge le document html

    Document doc;
    try {//from   w ww.j  av  a 2 s . c o m
        doc = Jsoup.connect(uri).get();
        Elements elements = doc.getElementsByClass("table");
        for (Element element : elements) {
            Element tbody = element.getElementsByTag("tbody").first();
            for (Element child : tbody.children()) {
                Element teamNameElement = child.getElementsByClass("tbl-teamname").first();
                String name = teamNameElement.text();
                String countryCode = child.getElementsByClass("tbl-countrycode").first().text();
                String imgUrl = teamNameElement.select("img").first().absUrl("src");
                Team team = new Team();
                team.setName(name);
                team.setCountryCode(countryCode);
                team.setImgUrl(imgUrl);
                team.setNationalTeam(true);
                result.add(team);
            }
        }

        //String titre =  element.text();
    } catch (IOException e) {
        e.printStackTrace();
    }

    //        RestTemplate restTemplate = new RestTemplate();
    //        ResponseEntity<ChampionListDto> response = restTemplate.getForEntity(
    //                uri,
    //                ChampionListDto.class);
    //
    //        List<ChampionDto> champions = response.getBody().getChampions();
    //        return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList());
    result.sort((t1, t2) -> t1.getName().compareTo(t2.getName()));
    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java

private ImmutableMap<String, String> druckSachenProperties(Document htmlDoc) {

    ImmutableMap.Builder<String, String> mapBuilder = ImmutableMap.builder();
    Elements keyElements = htmlDoc.getElementsByClass("kb1"); // td elements
    for (Element element : keyElements) {
        String key = removeNonBreakingSpacesAndTrim(element.text());
        if (key.endsWith(":")) {
            key = key.substring(0, key.length() - 1);
        }/*from   www. j  av  a  2 s  . c o  m*/
        if (element.nextElementSibling() != null && !element.nextElementSibling().hasAttr("kb1")) {
            String value = removeNonBreakingSpacesAndTrim(element.nextElementSibling().text());

            if ((!key.isEmpty()) && (!value.isEmpty())) {
                mapBuilder.put(key, value);
            }
        }
    }
    return mapBuilder.build();
}

From source file:eu.masconsult.bgbanking.banks.dskbank.DskClient.java

private boolean checkLoggedIn(Document doc) {
    Elements sup_links = doc.getElementsByClass("supplemental_links");
    if (sup_links == null || sup_links.size() == 0) {
        throw new ParseException("getBankAccounts: can't find .supplemental_links");
    }/*  w w  w .  ja  v  a 2s  .  c o m*/
    for (Element sup_link : sup_links) {
        Elements exits = sup_link.getElementsContainingText("Log Out");
        if (exits != null && exits.size() > 0) {
            return true;
        }
    }
    return false;
}

From source file:es.logongas.util.seguridad.AuthenticationProviderImplMoodle.java

@Override
public Principal authenticate(Credential credential) {
    try {/*from   w ww . j a  v a 2s . c  om*/
        StrongPasswordEncryptor passwordEncryptor = new StrongPasswordEncryptor();

        if ((credential instanceof CredentialImplLoginPassword) == false) {
            return null;
        }

        CredentialImplLoginPassword credentialImplLoginPassword = (CredentialImplLoginPassword) credential;

        if (loginAdmin.equalsIgnoreCase(credentialImplLoginPassword.getLogin())) {

            if (passwordEncryptor.checkPassword(credentialImplLoginPassword.getPassword(),
                    passwordAdmin) == false) {
                return null;
            }

        } else {

            HostnameVerifier hostnameVerifier = org.apache.http.conn.ssl.SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER;
            HttpsURLConnection.setDefaultHostnameVerifier(hostnameVerifier);
            DefaultHttpClient httpClientPost = new DefaultHttpClient();
            HttpPost httpPost = new HttpPost(moodleLoginURL);
            List<NameValuePair> nvps = new ArrayList<NameValuePair>();
            nvps.add(new BasicNameValuePair("username", credentialImplLoginPassword.getLogin()));
            nvps.add(new BasicNameValuePair("password", credentialImplLoginPassword.getPassword()));
            httpPost.setEntity(new UrlEncodedFormEntity(nvps));
            HttpResponse response1 = httpClientPost.execute(httpPost);
            InputStream inputStream = response1.getEntity().getContent();
            Document document = Jsoup.parse(inputStreamToString(inputStream));

            Elements divElements = document.getElementsByClass("logininfo");
            if (divElements.size() == 0) {
                return null;
            }
            Element divElement = divElements.get(0);
            Elements aElements = divElement.getElementsByTag("a");
            if (aElements.size() == 0) {
                return null;
            }
            Element aElement = aElements.get(aElements.size() - 1);
            if (aElement.attr("href").indexOf("logout") < 0) {
                return null;
            }

            DefaultHttpClient httpclient2 = new DefaultHttpClient();
            HttpGet httpGet = new HttpGet(aElement.attr("href"));

            httpclient2.execute(httpGet);

        }
        GenericDAO<Identity, Integer> genericDAO = daoFactory.getDAO(Identity.class);
        Identity identity = genericDAO.readByNaturalKey(credentialImplLoginPassword.getLogin());

        return identity;

    } catch (BusinessException ex) {
        return null;
    } catch (Exception ex) {
        log.info("Fallo al conectarse al moodle", ex);
        throw new RuntimeException(ex);
    }
}

From source file:blackman.matt.board.Post.java

/**
 * Formats the HTML on the post text to accurately display it on the post.
 *
 * @param post The unformatted text of the post.
 * @return A formatted version of the post.
 *///  ww  w .  java2 s  . c  om
private String formatPostBody(String post) {
    Document formattedText = Jsoup.parse(post);
    Pattern p = Pattern.compile("^/.*/index\\.html");

    // Red Text
    Elements redTexts = formattedText.getElementsByClass("heading");
    for (Element text : redTexts) {
        text.wrap("<font color=\"#AF0A0F\"><strong></strong></font>");
    }

    // Green text
    Elements greenTexts = formattedText.getElementsByClass("quote");
    for (Element text : greenTexts) {
        text.wrap("<font color=\"#789922\"></font>");
    }

    // Board Links
    Elements boardLinks = formattedText.select("a");
    for (Element link : boardLinks) {
        String url = link.attr("href");
        Matcher m = p.matcher(url);
        if (m.matches()) {
            link.attr("href", "http://8chan.co" + url);
        }
    }

    // Reply links
    Elements replyLinks = formattedText.select("a[onclick^=highlightReply");
    for (Element reply : replyLinks) {
        repliedTo.add(reply.attr("href").split("#")[1]);
        boardLinks.attr("href", "http://8chan.co" + reply.attr("href"));
    }

    // Post too long text removal
    Elements tooLongs = formattedText.getElementsByClass("toolong");
    for (Element text : tooLongs) {
        text.text("");
    }

    return formattedText.toString();
}

From source file:accountgen.controller.Controller.java

private void setName(Document doc, Person p) {
    Elements e = doc.getElementsByClass("address");
    Element name = e.select("h3").first();
    p.setFirstname(StringEscapeUtils.unescapeHtml4(name.html().split(" ")[0]).trim());
    p.setMiddlename("");
    p.setLastname(StringEscapeUtils.unescapeHtml4(name.html().split(name.html().split(" ")[0])[1]).trim());
}