Example usage for org.jsoup.parser Parser xmlParser

List of usage examples for org.jsoup.parser Parser xmlParser

Introduction

In this page you can find the example usage for org.jsoup.parser Parser xmlParser.

Prototype

public static Parser xmlParser() 

Source Link

Document

Create a new XML parser.

Usage

From source file:org.structr.web.importer.Importer.java

/**
 * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment.
 *
 * @param fragment//from   w  ww .j  a  v  a2  s .c o m
 * @return
 * @throws FrameworkException
 */
public boolean parse(final boolean fragment) throws FrameworkException {

    init();

    if (StringUtils.isNotBlank(code)) {

        if (!isDeployment) {
            logger.info("##### Start parsing code for page {} #####", new Object[] { name });
        } else {

            // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly
            code = code.replaceAll(
                    "<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>",
                    "<$1$2/>");
        }

        if (fragment) {

            if (isDeployment) {

                final List<Node> nodeList = Parser.parseXmlFragment(code, "");
                parsedDocument = Document.createShell("");
                final Element body = parsedDocument.body();
                final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);

                for (int i = nodes.length - 1; i > 0; i--) {
                    nodes[i].remove();
                }

                for (Node node : nodes) {
                    body.appendChild(node);
                }

            } else {

                parsedDocument = Jsoup.parseBodyFragment(code);
            }

        } else {

            if (isDeployment) {

                parsedDocument = Jsoup.parse(code, "", Parser.xmlParser());

            } else {

                parsedDocument = Jsoup.parse(code);
            }

        }

    } else {

        if (!isDeployment) {
            logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name });
        }

        code = HttpHelper.get(address);
        parsedDocument = Jsoup.parse(code);

    }

    return true;

}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

@Override
public MediaMetadata getTvShowMetadata(MediaScrapeOptions options) throws Exception {
    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String id = "";
    String langu = options.getLanguage().name();

    // id from result
    if (options.getResult() != null) {
        id = options.getResult().getId();
    }//from   w  ww  .jav a2  s.  co m

    // do we have an id from the options?
    if (StringUtils.isEmpty(id)) {
        id = options.getId(providerInfo.getId());
    }

    if (StringUtils.isEmpty(id)) {
        return md;
    }

    trackConnections();

    // call API http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=4242
    String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
            + id;
    Document doc = null;
    try {
        CachedUrl cachedUrl = new CachedUrl(url);

        doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
    } catch (Exception e) {
        LOGGER.error("failed to get TV show metadata: " + e.getMessage());

        // clear cache
        CachedUrl.removeCachedFileForUrl(url);
    }

    if (doc == null || doc.children().size() == 0) {
        return md;
    }

    md.setId(providerInfo.getId(), id);

    Element anime = doc.child(0);

    for (Element e : anime.children()) {
        if ("startdate".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.RELEASE_DATE, e.text());
            try {
                Date date = org.tinymediamanager.scraper.util.StrgUtils.parseDate(e.text());
                md.storeMetadata(MediaMetadata.YEAR, new SimpleDateFormat("yyyy").format(date));
            } catch (Exception ex) {
            }
        }

        if ("titles".equalsIgnoreCase(e.tagName())) {
            parseTitle(md, langu, e);
        }

        if ("description".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.PLOT, e.text());
        }

        if ("ratings".equalsIgnoreCase(e.tagName())) {
            getRating(md, e);
        }

        if ("picture".equalsIgnoreCase(e.tagName())) {
            md.storeMetadata(MediaMetadata.POSTER_URL, IMAGE_SERVER + e.text());
        }

        if ("characters".equalsIgnoreCase(e.tagName())) {
            getActors(md, e);
        }

    }

    // add static "Anime" genre
    md.addGenre(MediaGenres.ANIME);

    return md;
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

@Override
public MediaMetadata getEpisodeMetadata(MediaScrapeOptions options) throws Exception {
    MediaMetadata md = new MediaMetadata(providerInfo.getId());

    String id = "";
    String langu = options.getLanguage().name();

    // id from result
    if (options.getResult() != null) {
        id = options.getResult().getId();
    }//from  w w w.jav a 2s .c o m

    // do we have an id from the options?
    if (StringUtils.isEmpty(id)) {
        id = options.getId(providerInfo.getId());
    }

    if (StringUtils.isEmpty(id)) {
        return md;
    }

    // get episode number and season number
    int seasonNr = -1;
    int episodeNr = -1;

    try {
        seasonNr = Integer.parseInt(options.getId(MediaMetadata.SEASON_NR));
        episodeNr = Integer.parseInt(options.getId(MediaMetadata.EPISODE_NR));
    } catch (Exception e) {
        LOGGER.warn("error parsing season/episode number");
    }

    if (seasonNr == -1 || episodeNr == -1) {
        return md;
    }

    trackConnections();

    String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
            + id;
    Document doc = null;
    try {
        CachedUrl cachedUrl = new CachedUrl(url);
        doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
    } catch (Exception e) {
        LOGGER.error("failed to get episode metadata: " + e.getMessage());

        // clear cache
        CachedUrl.removeCachedFileForUrl(url);
    }

    if (doc == null || doc.children().size() == 0) {
        return md;
    }

    md.setId(providerInfo.getId(), id);

    List<Episode> episodes = parseEpisodes(doc);

    Episode episode = null;

    // filter out the episode
    for (Episode ep : episodes) {
        if (ep.season == seasonNr && ep.episode == episodeNr) {
            episode = ep;
            break;
        }
    }

    if (episode == null) {
        return md;
    }

    String title = episode.titles.get(langu);
    if (StringUtils.isBlank(title)) {
        title = episode.titles.get("en");
    }
    if (StringUtils.isBlank(title)) {
        title = episode.titles.get("x-jat");
    }
    md.storeMetadata(MediaMetadata.TITLE, title);
    md.storeMetadata(MediaMetadata.PLOT, episode.summary);
    md.storeMetadata(MediaMetadata.RATING, episode.rating);
    md.storeMetadata(MediaMetadata.RELEASE_DATE, episode.airdate);
    md.storeMetadata(MediaMetadata.RUNTIME, episode.runtime);
    md.setId(providerInfo.getId(), episode.id);

    return md;
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

@Override
public List<MediaEpisode> getEpisodeList(MediaScrapeOptions options) throws Exception {
    List<MediaEpisode> episodes = new ArrayList<MediaEpisode>();

    String id = "";
    String langu = options.getLanguage().name();

    // id from result
    if (options.getResult() != null) {
        id = options.getResult().getId();
    }//w  w w  .  j  a v a2s  . com

    // do we have an id from the options?
    if (StringUtils.isEmpty(id)) {
        id = options.getId(providerInfo.getId());
    }

    if (StringUtils.isEmpty(id)) {
        return episodes;
    }

    trackConnections();

    String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
            + id;
    Document doc = null;
    try {
        CachedUrl cachedUrl = new CachedUrl(url);
        doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
    } catch (Exception e) {
        LOGGER.error("error getting episode list: " + e.getMessage());

        // clear cache
        CachedUrl.removeCachedFileForUrl(url);
    }

    if (doc == null || doc.children().size() == 0) {
        return episodes;
    }

    // filter out the episode
    for (Episode ep : parseEpisodes(doc)) {
        MediaEpisode episode = new MediaEpisode(getProviderInfo().getId());
        episode.title = ep.titles.get(langu);
        episode.season = ep.season;
        episode.episode = ep.episode;
        if (StringUtils.isBlank(episode.title)) {
            episode.title = ep.titles.get("en");
        }
        if (StringUtils.isBlank(episode.title)) {
            episode.title = ep.titles.get("x-jat");
        }

        episode.plot = ep.summary;
        episode.rating = ep.rating;
        episode.firstAired = ep.airdate;
        episode.ids.put(providerInfo.getId(), ep.id);
    }

    return episodes;
}