Example usage for org.jsoup.nodes Document child

List of usage examples for org.jsoup.nodes Document child

Introduction

In this page you can find the example usage for org.jsoup.nodes Document child.

Prototype

public Element child(int index) 

Source Link

Document

Get a child element of this element, by its 0-based index number.

Usage

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

@Override
public MediaMetadata getTvShowMetadata(MediaScrapeOptions options) throws Exception {
    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String id = "";
    String langu = options.getLanguage().name();

    // id from result
    if (options.getResult() != null) {
        id = options.getResult().getId();
    }//from  w  w w  .j av a 2s .c  om

    // do we have an id from the options?
    if (StringUtils.isEmpty(id)) {
        id = options.getId(providerInfo.getId());
    }

    if (StringUtils.isEmpty(id)) {
        return md;
    }

    trackConnections();

    // call API http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=4242
    String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
            + id;
    Document doc = null;
    try {
        CachedUrl cachedUrl = new CachedUrl(url);

        doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
    } catch (Exception e) {
        LOGGER.error("failed to get TV show metadata: " + e.getMessage());

        // clear cache
        CachedUrl.removeCachedFileForUrl(url);
    }

    if (doc == null || doc.children().size() == 0) {
        return md;
    }

    md.setId(providerInfo.getId(), id);

    Element anime = doc.child(0);

    for (Element e : anime.children()) {
        if ("startdate".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.RELEASE_DATE, e.text());
            try {
                Date date = org.tinymediamanager.scraper.util.StrgUtils.parseDate(e.text());
                md.storeMetadata(MediaMetadata.YEAR, new SimpleDateFormat("yyyy").format(date));
            } catch (Exception ex) {
            }
        }

        if ("titles".equalsIgnoreCase(e.tagName())) {
            parseTitle(md, langu, e);
        }

        if ("description".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.PLOT, e.text());
        }

        if ("ratings".equalsIgnoreCase(e.tagName())) {
            getRating(md, e);
        }

        if ("picture".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.POSTER_URL, IMAGE_SERVER + e.text());
        }

        if ("characters".equalsIgnoreCase(e.tagName())) {
            getActors(md, e);
        }

    }

    // add static "Anime" genre
    md.addGenre(MediaGenres.ANIME);

    return md;
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

private List<Episode> parseEpisodes(Document doc) {
    List<Episode> episodes = new ArrayList<Episode>();

    Element anime = doc.child(0);
    Element eps = null;/*from  ww  w  .  ja  va  2 s.c  om*/
    // find the "episodes" child
    for (Element e : anime.children()) {
        if ("episodes".equalsIgnoreCase(e.tagName())) {
            eps = e;
            break;
        }
    }

    if (eps == null) {
        return episodes;
    }

    for (Element e : eps.children()) {
        // filter out the desired episode
        if ("episode".equals(e.tagName())) {
            Episode episode = new Episode();
            try {
                episode.id = Integer.parseInt(e.attr("id"));
            } catch (NumberFormatException ex) {
            }
            for (Element episodeInfo : e.children()) {
                if ("epno".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.episode = Integer.parseInt(episodeInfo.text());

                        // looks like anidb is storing anything in a single season, so put 1 to season, if type = 1
                        if ("1".equals(episodeInfo.attr("type"))) {
                            episode.season = 1;
                        } else {
                            // else - we see them as "specials"
                            episode.season = 0;
                        }

                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("length".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.runtime = Integer.parseInt(episodeInfo.text());
                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("airdate".equalsIgnoreCase(episodeInfo.tagName())) {
                    episode.airdate = episodeInfo.text();
                    continue;
                }

                if ("rating".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.rating = Float.parseFloat(episodeInfo.text());
                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("title".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.titles.put(episodeInfo.attr("xml:lang").toLowerCase(), episodeInfo.text());
                    } catch (Exception ex) {
                    }
                    continue;
                }

                if ("summary".equalsIgnoreCase(episodeInfo.tagName())) {
                    episode.summary = episodeInfo.text();
                    continue;
                }
            }
            episodes.add(episode);
        }
    }

    return episodes;
}

From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java

/**
 * /*from   w w  w  .  ja va2s.  co m*/
 * @param iQid question ID
 * @return map of features and attributes: question title, body, category, best answer, date
 * @throws Exception
 */
public static Map<String, String> extractData(String iQid) throws Exception {

    Map<String, String> res = new LinkedHashMap<>();
    res.put("qid", iQid);

    // parse date from qid
    res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString());

    // get and mine html page
    String url = URL_PREFIX + iQid;
    HttpClient client = new HttpClient();
    GetMethod method = new GetMethod(url);
    method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
            new DefaultHttpMethodRetryHandler(3, false));
    try {
        int statusCode = client.executeMethod(method);
        if (statusCode != HttpStatus.SC_OK) {
            System.err.println("Method failed: " + method.getStatusLine());
        }
        InputStream responseBody = method.getResponseBodyAsStream();

        // strip top levels
        Document doc = Jsoup.parse(responseBody, "UTF8", url);
        Element html = doc.child(0);

        Element body = html.child(1);
        Element head = html.child(0);

        // get category
        res.put("Top level Category", findElementText(body, cc));

        // get title
        res.put("Title", findElementText(head, ct));

        // get body
        res.put("Body", findElementText(head, cb));

        // get keywords
        res.put("Keywords", findElementText(head, ck));

        // get best answer
        Element best_answer_div = html.select("div#ya-best-answer").first();
        if (best_answer_div != null) {
            res.put("Best Answer", findElementText(best_answer_div, cba));
        }

        responseBody.close();

    } catch (HttpException e) {
        System.err.println("Fatal protocol violation: " + e.getMessage());
        e.printStackTrace();
    } catch (IOException e) {
        System.err.println("Fatal transport error: " + e.getMessage());
        e.printStackTrace();
    } finally {
        method.releaseConnection();
    }

    return res;
}