Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

static List<ReservedItem> parse_reslist(Document doc) {
    List<ReservedItem> reservations = new ArrayList<>();
    Elements copytrs = doc.select(".data tr");
    int trs = copytrs.size();
    if (trs <= 1) {
        return null;
    }//from  w ww  .ja v  a  2 s .c o m
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        ReservedItem item = new ReservedItem();

        if (tr.text().contains("keine Daten") || tr.children().size() == 1) {
            return null;
        }

        item.setTitle(tr.child(2).select("b, strong").text().trim());
        try {
            String[] rowsplit2 = tr.child(2).html().split("<br[ /]*>");
            String[] rowsplit3 = tr.child(3).html().split("<br[ /]*>");
            if (rowsplit2.length > 1)
                item.setAuthor(rowsplit2[1].trim());
            if (rowsplit3.length > 2)
                item.setBranch(rowsplit3[2].trim());
            if (rowsplit3.length > 2) {
                item.setStatus(rowsplit3[0].trim() + " (" + rowsplit3[1].trim() + ")");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        reservations.add(item);
    }
    return reservations;
}

From source file:org.javiermoreno.torrentscratcher.Runner.java

public List<String> getRecordsUrl(int page) throws IOException {
    List<String> result = new ArrayList<>();

    String url = "http://www.elitetorrent.net/categoria/13/peliculas-hdrip/modo:listado/orden:valoracion/pag:{page}";
    url = url.replace("{page}", String.valueOf(page));
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a.nombre");
    for (Element elem : links) {
        result.add(elem.attr("href"));
    }/*  ww w  .ja  v a 2 s  .  com*/
    return result;
}

From source file:com.gorsini.searcher.CanalplaySearcher.java

/**
 *
 * @param movieToSearch/*from  ww w.j  ava  2s .c o m*/
 * @return ArrayList<Movie> empty if no result
 */
public ArrayList<Movie> searchMovie(Movie movieToSearch) {
    try {
        /*
         curl "http://vod.canalplay.com/pages/recherche/challengeexplorer.aspx?action=4&search=hercule" -H "Referer: http://vod.canalplay.com/"
         ramne que les films dispo. 
         */
        String titleToSearch = movieToSearch.getTitle();
        LOG.log(Level.FINER, "titre  rechercher : {0}", titleToSearch);
        String url = makeURL(titleToSearch);
        Document doc = Jsoup.connect(url).referrer("http://vod.canalplay.com/").get();
        Elements movies = doc.select("div.list_movie");

        if (movies.isEmpty()) {
            LOG.log(Level.FINER, "no movie found with title {0}", titleToSearch);
            return null;
        } else {
            ArrayList<Movie> result = new ArrayList<Movie>();
            for (Element movie : movies) {
                MovieSelector selector = new MovieSelector();
                Movie movieFound = selector.selectMovie(movie, movieToSearch);
                if (movieFound != null) {
                    LOG.log(Level.FINER, "film trouv:{0}", movieFound.toString());
                    result.add(movieFound);
                }
            }
            return result;
        }
    } catch (Exception e) {
        System.out.println("problme HTTP");
        e.printStackTrace();
        return null;
    }
}

From source file:caarray.client.examples.java.CASRemoteEJBClient.java

private HttpResponse submitCasLogin(HttpResponse casLoginPageResponse)
        throws ClientProtocolException, IOException {
    String postURL = BaseProperties.CAS_URL + "/login";
    HttpPost httpPost = new HttpPost(postURL);

    // These 3 input fields are hidden values set in the CAS login page, CAS server expects them to facilitate its workflows,
    // we will be capturing them from the page and adding to username/password combination when posting login
    //      <input type="hidden" name="lt" value="LT-38-TPi7eJ2w3UNbO2dK0qPphKUPGuZJna" />
    //      <input type="hidden" name="execution" value="e1s1" />
    //      <input type="hidden" name="_eventId" value="submit" />
    Document contentDocument = getContentDocument(casLoginPageResponse);
    String loginTicketParam = contentDocument.select("input[name=lt]").attr("value");
    String executionParam = contentDocument.select("input[name=execution]").attr("value");
    String eventIdParam = contentDocument.select("input[name=_eventId]").attr("value");

    List<NameValuePair> nvps = new ArrayList<NameValuePair>();
    nvps.add(new BasicNameValuePair("username", BaseProperties.CAS_USERNAME));
    nvps.add(new BasicNameValuePair("password", BaseProperties.CAS_PASSWORD));
    nvps.add(new BasicNameValuePair("lt", loginTicketParam));
    nvps.add(new BasicNameValuePair("execution", executionParam));
    nvps.add(new BasicNameValuePair("_eventId", eventIdParam));

    httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
    return httpclient.execute(httpPost);
}

From source file:hu.petabyte.redflags.engine.gear.archiver.Archiver.java

@Override
protected Notice processImpl(Notice notice) throws Exception {
    for (DisplayLanguage lang : langs) {

        // get data tab
        TedResponse r = ted.get().requestNoticeTabQuietly(notice.getId(), lang, Tab.DATA);

        // get other tabs
        if (null != r) {
            Document d = r.getParsedDocument();
            for (Tab tab : Tab.values()) {
                if (tab != Tab.DATA && !d.select("a[href~=tabId=" + tab.getId()).isEmpty()) {
                    LOG.trace("Found tab: {}:{}:{}, fetching", notice.getId(), lang, tab);
                    ted.get().requestNoticeTabQuietly(notice.getId(), lang, tab);
                }/*from  ww w  . j  a  va2  s  .c  o m*/
            }
        }
    }
    return notice;
}

From source file:com.soulgalore.crawler.core.impl.AhrefPageURLParser.java

private Set<CrawlerURL> fetch(String query, String attributeKey, Document doc, String url) {

    final Set<CrawlerURL> urls = new HashSet<CrawlerURL>();

    final Elements elements = doc.select(query);

    for (Element src : elements) {

        if (src.attr(attributeKey).isEmpty())
            continue;

        // don't fetch mailto links
        if (src.attr(attributeKey).startsWith(MAIL_TO))
            continue;

        else if (IFRAME.equals(src.tag().getName()))
            urls.add(new CrawlerURL(src.attr(attributeKey), url));

        else/*from  w ww .j a  va2 s . c om*/
            urls.add(new CrawlerURL(src.attr(attributeKey), url));

    }

    return urls;

}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule6.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Element editor = null;/* ww w  .  j a v a 2s. c  om*/
    try {
        editor = doc.select(".authlist").get(0).select("p").get(0);
    } catch (IndexOutOfBoundsException e) {
        try {
            editor = doc.select("h4:contains(Editors) ~ p").get(0);
        } catch (IndexOutOfBoundsException e1) {
            return null;
        }
    }

    String[] splitted = editor.html().split("<br />");
    if (splitted.length < 2)
        splitted = editor.html().split("<br clear=\"none\" />");

    for (String split : splitted) {
        if (!split.isEmpty()) {
            if (split.equals("WHATWG:") || split.equals("W3C:"))
                continue;
            Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
            Person result = NameParser.parse(newdoc.text());
            if (result == null)
                continue;

            for (int i = 0; i < newdoc.select("a").size(); i++) {
                if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                    if (newdoc.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(newdoc.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        }
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:org.javiermoreno.torrentscratcher.Runner.java

public Movie enrichMovieWithFilmAffinity(Movie movie) {
    try {/*w w w  . jav a  2  s  .com*/
        String url = "http://www.filmaffinity.com/es/search.php?stext={title}&stype=all";
        String title = URLEncoder.encode(movie.getTitle(), "UTF8");
        url = url.replace("{title}", title);
        Document doc = Jsoup.connect(url).get();
        if (doc.select("[property=og:title]").size() == 0) {
            // several results found, take the first
            Element firstResult = doc.select(".item-search .mc-title a").first();
            if (firstResult == null) {
                // filmaffinity search engine failed
                log.warn("FilmAffinity 404: " + movie.getTitle());
                return movie;
            }
            url = "http://www.filmaffinity.com" + firstResult.attr("href");
            doc = Jsoup.connect(url).get();
        }
        movie.setFilmAffinityId(doc.select("div.rate-movie-box").attr("data-movie-id"));
        Elements movieInfo = doc.select("dl.movie-info");
        String originalTitle = movieInfo.select("dd").eq(0).text();
        originalTitle = originalTitle.replaceAll("\\([^\\(]*\\)", "").replaceAll("\\[[^\\(]*\\]", "")
                .replaceAll("aka$", "").trim();
        movie.setOriginalTitle(originalTitle);
        movie.setDescription(movieInfo.select("dd").eq(11).text());
    } catch (IOException ex) {
        log.warn(ex.getMessage());
    }
    return movie;
}

From source file:net.pixomania.crawler.W3C.parser.rules.principalAuthors.PrincipalAuthorsRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Principal Author) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if (!prev.text().trim().toLowerCase().startsWith("principal author")) {
                skip = true;// ww w  .  j  ava 2  s .co  m
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("principal author")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        String[] splitted = editor.html().split(",");

        for (String split : splitted) {
            if (!split.isEmpty()) {
                if (split.toLowerCase().startsWith("(in alphabetic")
                        || split.toLowerCase().startsWith("see acknowl")
                        || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac")
                        || split.toLowerCase().startsWith("see participants")
                        || split.toLowerCase().contains("note:")) {
                    Log.log("warning", "Spec " + url + " may refer to a different section!");
                    continue;
                }
                if (split.equals("WHATWG:") || split.equals("W3C:"))
                    continue;
                Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                Person result = NameParser.parse(newdoc.text());
                if (result == null)
                    continue;

                for (int i = 0; i < newdoc.select("a").size(); i++) {
                    if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                        if (newdoc.select("a").get(i).attr("href").contains("@")) {
                            result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                        } else {
                            result.addWebsite(newdoc.select("a").get(i).attr("href"));
                        }
                    }
                }

                editorList.add(result);
            }
        }
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:zjut.soft.finalwork.fragment.PageFragment1.java

public void showLevelResult() {
    new Thread(new Runnable() {

        @Override// w ww  .  j av a2  s . c  om
        public void run() {
            try {
                HttpGet get = new HttpGet(((YCApplication) getActivity().getApplication()).get("selectedIp")
                        + Constant.levelQuery);

                YCApplication app = (YCApplication) getActivity().getApplicationContext();
                HttpResponse response = app.getClient().execute(get);
                HttpEntity entity = response.getEntity();
                BufferedReader br = new BufferedReader(
                        new InputStreamReader(entity.getContent(), Constant.ENCODING));
                StringBuilder sb = new StringBuilder();
                String temp = null;
                while ((temp = br.readLine()) != null) {
                    sb.append(temp);
                }

                Document doc = Jsoup.parse(sb.toString());

                Elements tables = doc.select("#DJKCJ");
                if (tables.size() > 0) {
                    Element table = tables.get(0);
                    Elements trs = table.select("tr");
                    levelTest = new ArrayList<LevelTest>();
                    info = new StringBuilder();
                    if (trs.size() > 1) {
                        for (int i = 1; i < trs.size(); i++) {
                            LevelTest test = new LevelTest();
                            Element tr = trs.get(i);
                            Elements tds = tr.select("td");
                            String name = tds.get(0).select("span").get(0).html();
                            String grade = tds.get(1).select("span").get(0).html();
                            String date = tds.get(2).select("span").get(0).html();
                            System.out.println(name + "," + grade + "," + date);
                            info.append(name + "," + grade + "," + date + "\n");
                            test.setName(name);
                            test.setGrade(grade);
                            test.setDate(date);
                            levelTest.add(test);

                        }
                    }
                }
                mHandler.post(new Runnable() {

                    @Override
                    public void run() {
                        tv.setText(info.toString());
                    }
                });
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }).start();
}