List of usage examples for org.jsoup.nodes Document child
public Element child(int index)
From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java
@Override public MediaMetadata getTvShowMetadata(MediaScrapeOptions options) throws Exception { MediaMetadata md = new MediaMetadata(providerInfo.getId()); String id = ""; String langu = options.getLanguage().name(); // id from result if (options.getResult() != null) { id = options.getResult().getId(); }//from w w w .j av a 2s .c om // do we have an id from the options? if (StringUtils.isEmpty(id)) { id = options.getId(providerInfo.getId()); } if (StringUtils.isEmpty(id)) { return md; } trackConnections(); // call API http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=4242 String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=" + id; Document doc = null; try { CachedUrl cachedUrl = new CachedUrl(url); doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser()); } catch (Exception e) { LOGGER.error("failed to get TV show metadata: " + e.getMessage()); // clear cache CachedUrl.removeCachedFileForUrl(url); } if (doc == null || doc.children().size() == 0) { return md; } md.setId(providerInfo.getId(), id); Element anime = doc.child(0); for (Element e : anime.children()) { if ("startdate".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.RELEASE_DATE, e.text()); try { Date date = org.tinymediamanager.scraper.util.StrgUtils.parseDate(e.text()); md.storeMetadata(MediaMetadata.YEAR, new SimpleDateFormat("yyyy").format(date)); } catch (Exception ex) { } } if ("titles".equalsIgnoreCase(e.tagName())) { parseTitle(md, langu, e); } if ("description".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.PLOT, e.text()); } if ("ratings".equalsIgnoreCase(e.tagName())) { getRating(md, e); } if ("picture".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.POSTER_URL, IMAGE_SERVER + e.text()); } if ("characters".equalsIgnoreCase(e.tagName())) { getActors(md, e); } } // add static "Anime" genre md.addGenre(MediaGenres.ANIME); return md; }
From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java
private List<Episode> parseEpisodes(Document doc) { List<Episode> episodes = new ArrayList<Episode>(); Element anime = doc.child(0); Element eps = null;/*from ww w . ja va 2 s.c om*/ // find the "episodes" child for (Element e : anime.children()) { if ("episodes".equalsIgnoreCase(e.tagName())) { eps = e; break; } } if (eps == null) { return episodes; } for (Element e : eps.children()) { // filter out the desired episode if ("episode".equals(e.tagName())) { Episode episode = new Episode(); try { episode.id = Integer.parseInt(e.attr("id")); } catch (NumberFormatException ex) { } for (Element episodeInfo : e.children()) { if ("epno".equalsIgnoreCase(episodeInfo.tagName())) { try { episode.episode = Integer.parseInt(episodeInfo.text()); // looks like anidb is storing anything in a single season, so put 1 to season, if type = 1 if ("1".equals(episodeInfo.attr("type"))) { episode.season = 1; } else { // else - we see them as "specials" episode.season = 0; } } catch (NumberFormatException ex) { } continue; } if ("length".equalsIgnoreCase(episodeInfo.tagName())) { try { episode.runtime = Integer.parseInt(episodeInfo.text()); } catch (NumberFormatException ex) { } continue; } if ("airdate".equalsIgnoreCase(episodeInfo.tagName())) { episode.airdate = episodeInfo.text(); continue; } if ("rating".equalsIgnoreCase(episodeInfo.tagName())) { try { episode.rating = Float.parseFloat(episodeInfo.text()); } catch (NumberFormatException ex) { } continue; } if ("title".equalsIgnoreCase(episodeInfo.tagName())) { try { episode.titles.put(episodeInfo.attr("xml:lang").toLowerCase(), episodeInfo.text()); } catch (Exception ex) { } continue; } if ("summary".equalsIgnoreCase(episodeInfo.tagName())) { episode.summary = episodeInfo.text(); continue; } } episodes.add(episode); } } return episodes; }
From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java
/** * /*from w w w . ja va2s. co m*/ * @param iQid question ID * @return map of features and attributes: question title, body, category, best answer, date * @throws Exception */ public static Map<String, String> extractData(String iQid) throws Exception { Map<String, String> res = new LinkedHashMap<>(); res.put("qid", iQid); // parse date from qid res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString()); // get and mine html page String url = URL_PREFIX + iQid; HttpClient client = new HttpClient(); GetMethod method = new GetMethod(url); method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); try { int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + method.getStatusLine()); } InputStream responseBody = method.getResponseBodyAsStream(); // strip top levels Document doc = Jsoup.parse(responseBody, "UTF8", url); Element html = doc.child(0); Element body = html.child(1); Element head = html.child(0); // get category res.put("Top level Category", findElementText(body, cc)); // get title res.put("Title", findElementText(head, ct)); // get body res.put("Body", findElementText(head, cb)); // get keywords res.put("Keywords", findElementText(head, ck)); // get best answer Element best_answer_div = html.select("div#ya-best-answer").first(); if (best_answer_div != null) { res.put("Best Answer", findElementText(best_answer_div, cba)); } responseBody.close(); } catch (HttpException e) { System.err.println("Fatal protocol violation: " + e.getMessage()); e.printStackTrace(); } catch (IOException e) { System.err.println("Fatal transport error: " + e.getMessage()); e.printStackTrace(); } finally { method.releaseConnection(); } return res; }