List of usage examples for org.jsoup.parser Parser xmlParser
public static Parser xmlParser()
From source file:org.structr.web.importer.Importer.java
/** * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment. * * @param fragment//from w ww .j a v a2 s .c o m * @return * @throws FrameworkException */ public boolean parse(final boolean fragment) throws FrameworkException { init(); if (StringUtils.isNotBlank(code)) { if (!isDeployment) { logger.info("##### Start parsing code for page {} #####", new Object[] { name }); } else { // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly code = code.replaceAll( "<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>", "<$1$2/>"); } if (fragment) { if (isDeployment) { final List<Node> nodeList = Parser.parseXmlFragment(code, ""); parsedDocument = Document.createShell(""); final Element body = parsedDocument.body(); final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } } else { parsedDocument = Jsoup.parseBodyFragment(code); } } else { if (isDeployment) { parsedDocument = Jsoup.parse(code, "", Parser.xmlParser()); } else { parsedDocument = Jsoup.parse(code); } } } else { if (!isDeployment) { logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name }); } code = HttpHelper.get(address); parsedDocument = Jsoup.parse(code); } return true; }
From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java
@Override public MediaMetadata getTvShowMetadata(MediaScrapeOptions options) throws Exception { MediaMetadata md = new MediaMetadata(providerInfo.getId()); String id = ""; String langu = options.getLanguage().name(); // id from result if (options.getResult() != null) { id = options.getResult().getId(); }//from w ww .jav a2 s. co m // do we have an id from the options? if (StringUtils.isEmpty(id)) { id = options.getId(providerInfo.getId()); } if (StringUtils.isEmpty(id)) { return md; } trackConnections(); // call API http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=4242 String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=" + id; Document doc = null; try { CachedUrl cachedUrl = new CachedUrl(url); doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser()); } catch (Exception e) { LOGGER.error("failed to get TV show metadata: " + e.getMessage()); // clear cache CachedUrl.removeCachedFileForUrl(url); } if (doc == null || doc.children().size() == 0) { return md; } md.setId(providerInfo.getId(), id); Element anime = doc.child(0); for (Element e : anime.children()) { if ("startdate".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.RELEASE_DATE, e.text()); try { Date date = org.tinymediamanager.scraper.util.StrgUtils.parseDate(e.text()); md.storeMetadata(MediaMetadata.YEAR, new SimpleDateFormat("yyyy").format(date)); } catch (Exception ex) { } } if ("titles".equalsIgnoreCase(e.tagName())) { parseTitle(md, langu, e); } if ("description".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.PLOT, e.text()); } if ("ratings".equalsIgnoreCase(e.tagName())) { getRating(md, e); } if ("picture".equalsIgnoreCase(e.tagName())) { md.storeMetadata(MediaMetadata.POSTER_URL, IMAGE_SERVER + e.text()); } if ("characters".equalsIgnoreCase(e.tagName())) { getActors(md, e); } } // add static "Anime" genre md.addGenre(MediaGenres.ANIME); return md; }
From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java
@Override public MediaMetadata getEpisodeMetadata(MediaScrapeOptions options) throws Exception { MediaMetadata md = new MediaMetadata(providerInfo.getId()); String id = ""; String langu = options.getLanguage().name(); // id from result if (options.getResult() != null) { id = options.getResult().getId(); }//from w w w.jav a 2s .c o m // do we have an id from the options? if (StringUtils.isEmpty(id)) { id = options.getId(providerInfo.getId()); } if (StringUtils.isEmpty(id)) { return md; } // get episode number and season number int seasonNr = -1; int episodeNr = -1; try { seasonNr = Integer.parseInt(options.getId(MediaMetadata.SEASON_NR)); episodeNr = Integer.parseInt(options.getId(MediaMetadata.EPISODE_NR)); } catch (Exception e) { LOGGER.warn("error parsing season/episode number"); } if (seasonNr == -1 || episodeNr == -1) { return md; } trackConnections(); String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=" + id; Document doc = null; try { CachedUrl cachedUrl = new CachedUrl(url); doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser()); } catch (Exception e) { LOGGER.error("failed to get episode metadata: " + e.getMessage()); // clear cache CachedUrl.removeCachedFileForUrl(url); } if (doc == null || doc.children().size() == 0) { return md; } md.setId(providerInfo.getId(), id); List<Episode> episodes = parseEpisodes(doc); Episode episode = null; // filter out the episode for (Episode ep : episodes) { if (ep.season == seasonNr && ep.episode == episodeNr) { episode = ep; break; } } if (episode == null) { return md; } String title = episode.titles.get(langu); if (StringUtils.isBlank(title)) { title = episode.titles.get("en"); } if (StringUtils.isBlank(title)) { title = episode.titles.get("x-jat"); } md.storeMetadata(MediaMetadata.TITLE, title); md.storeMetadata(MediaMetadata.PLOT, episode.summary); md.storeMetadata(MediaMetadata.RATING, episode.rating); md.storeMetadata(MediaMetadata.RELEASE_DATE, episode.airdate); md.storeMetadata(MediaMetadata.RUNTIME, episode.runtime); md.setId(providerInfo.getId(), episode.id); return md; }
From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java
@Override public List<MediaEpisode> getEpisodeList(MediaScrapeOptions options) throws Exception { List<MediaEpisode> episodes = new ArrayList<MediaEpisode>(); String id = ""; String langu = options.getLanguage().name(); // id from result if (options.getResult() != null) { id = options.getResult().getId(); }//w w w . j a v a2s . com // do we have an id from the options? if (StringUtils.isEmpty(id)) { id = options.getId(providerInfo.getId()); } if (StringUtils.isEmpty(id)) { return episodes; } trackConnections(); String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=" + id; Document doc = null; try { CachedUrl cachedUrl = new CachedUrl(url); doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser()); } catch (Exception e) { LOGGER.error("error getting episode list: " + e.getMessage()); // clear cache CachedUrl.removeCachedFileForUrl(url); } if (doc == null || doc.children().size() == 0) { return episodes; } // filter out the episode for (Episode ep : parseEpisodes(doc)) { MediaEpisode episode = new MediaEpisode(getProviderInfo().getId()); episode.title = ep.titles.get(langu); episode.season = ep.season; episode.episode = ep.episode; if (StringUtils.isBlank(episode.title)) { episode.title = ep.titles.get("en"); } if (StringUtils.isBlank(episode.title)) { episode.title = ep.titles.get("x-jat"); } episode.plot = ep.summary; episode.rating = ep.rating; episode.firstAired = ep.airdate; episode.ids.put(providerInfo.getId(), ep.id); } return episodes; }