org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java Source code

Java tutorial

Introduction

Here is the source code for org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

Source

/*
 * Copyright 2012 - 2015 Manuel Laggner
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tinymediamanager.scraper.anidb;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tinymediamanager.core.Constants;
import org.tinymediamanager.scraper.IMediaArtworkProvider;
import org.tinymediamanager.scraper.ITvShowMetadataProvider;
import org.tinymediamanager.scraper.MediaArtwork;
import org.tinymediamanager.scraper.MediaArtwork.MediaArtworkType;
import org.tinymediamanager.scraper.MediaCastMember;
import org.tinymediamanager.scraper.MediaCastMember.CastType;
import org.tinymediamanager.scraper.MediaEpisode;
import org.tinymediamanager.scraper.MediaGenres;
import org.tinymediamanager.scraper.MediaMetadata;
import org.tinymediamanager.scraper.MediaProviderInfo;
import org.tinymediamanager.scraper.MediaScrapeOptions;
import org.tinymediamanager.scraper.MediaSearchOptions;
import org.tinymediamanager.scraper.MediaSearchResult;
import org.tinymediamanager.scraper.MediaType;
import org.tinymediamanager.scraper.util.CachedUrl;
import org.tinymediamanager.scraper.util.Similarity;
import org.tinymediamanager.thirdparty.RingBuffer;

/**
 * The class AnimeDBMetadataProvider - a metadata provider for ANIME (AniDB)
 * 
 * @author Manuel Laggner
 */
public class AniDBMetadataProvider implements ITvShowMetadataProvider, IMediaArtworkProvider {
    private static final Logger LOGGER = LoggerFactory.getLogger(AniDBMetadataProvider.class);
    private static final String IMAGE_SERVER = "http://img7.anidb.net/pics/anime/";
    private static MediaProviderInfo providerInfo = new MediaProviderInfo(Constants.ANIDBID, "anidb.net",
            "Scraper for anidb.net - a big anime database");
    private static final RingBuffer<Long> connectionCounter = new RingBuffer<Long>(30);

    private HashMap<String, List<AniDBShow>> showsForLookup = new HashMap<String, List<AniDBShow>>();

    public AniDBMetadataProvider() {
    }

    @Override
    public MediaProviderInfo getProviderInfo() {
        return providerInfo;
    }

    @Override
    public MediaMetadata getTvShowMetadata(MediaScrapeOptions options) throws Exception {
        MediaMetadata md = new MediaMetadata(providerInfo.getId());
        String id = "";
        String langu = options.getLanguage().name();

        // id from result
        if (options.getResult() != null) {
            id = options.getResult().getId();
        }

        // do we have an id from the options?
        if (StringUtils.isEmpty(id)) {
            id = options.getId(providerInfo.getId());
        }

        if (StringUtils.isEmpty(id)) {
            return md;
        }

        trackConnections();

        // call API http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid=4242
        String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
                + id;
        Document doc = null;
        try {
            CachedUrl cachedUrl = new CachedUrl(url);

            doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
        } catch (Exception e) {
            LOGGER.error("failed to get TV show metadata: " + e.getMessage());

            // clear cache
            CachedUrl.removeCachedFileForUrl(url);
        }

        if (doc == null || doc.children().size() == 0) {
            return md;
        }

        md.setId(providerInfo.getId(), id);

        Element anime = doc.child(0);

        for (Element e : anime.children()) {
            if ("startdate".equalsIgnoreCase(e.tagName())) {
                md.storeMetadata(MediaMetadata.RELEASE_DATE, e.text());
                try {
                    Date date = org.tinymediamanager.scraper.util.StrgUtils.parseDate(e.text());
                    md.storeMetadata(MediaMetadata.YEAR, new SimpleDateFormat("yyyy").format(date));
                } catch (Exception ex) {
                }
            }

            if ("titles".equalsIgnoreCase(e.tagName())) {
                parseTitle(md, langu, e);
            }

            if ("description".equalsIgnoreCase(e.tagName())) {
                md.storeMetadata(MediaMetadata.PLOT, e.text());
            }

            if ("ratings".equalsIgnoreCase(e.tagName())) {
                getRating(md, e);
            }

            if ("picture".equalsIgnoreCase(e.tagName())) {
                md.storeMetadata(MediaMetadata.POSTER_URL, IMAGE_SERVER + e.text());
            }

            if ("characters".equalsIgnoreCase(e.tagName())) {
                getActors(md, e);
            }

        }

        // add static "Anime" genre
        md.addGenre(MediaGenres.ANIME);

        return md;
    }

    private void getActors(MediaMetadata md, Element e) {
        for (Element character : e.children()) {
            MediaCastMember member = new MediaCastMember(CastType.ACTOR);
            for (Element characterInfo : character.children()) {
                if ("name".equalsIgnoreCase(characterInfo.tagName())) {
                    member.setCharacter(characterInfo.text());
                }
                if ("seiyuu".equalsIgnoreCase(characterInfo.tagName())) {
                    member.setName(characterInfo.text());
                    String image = characterInfo.attr("picture");
                    if (StringUtils.isNotBlank(image)) {
                        member.setImageUrl("http://img7.anidb.net/pics/anime/" + image);
                    }
                }
            }
            md.addCastMember(member);
        }
    }

    private void getRating(MediaMetadata md, Element e) {
        for (Element rating : e.children()) {
            if ("temporary".equalsIgnoreCase(rating.tagName())) {
                try {
                    md.storeMetadata(MediaMetadata.RATING, Float.parseFloat(rating.text()));
                    md.storeMetadata(MediaMetadata.VOTE_COUNT, Integer.parseInt(rating.attr("count")));
                    break;
                } catch (NumberFormatException ex) {
                }
            }
        }
    }

    private void parseTitle(MediaMetadata md, String langu, Element e) {
        String titleEN = "";
        String titleScraperLangu = "";
        String titleFirst = "";
        for (Element title : e.children()) {
            // store first title if neither the requested one nor the english one available
            if (StringUtils.isBlank(titleFirst)) {
                titleFirst = title.text();
            }

            // store the english one for fallback
            if ("en".equalsIgnoreCase(title.attr("xml:lang"))) {
                titleEN = title.text();
            }

            // search for the requested one
            if (langu.equalsIgnoreCase(title.attr("xml:lang"))) {
                titleScraperLangu = title.text();
            }

        }

        if (StringUtils.isNotBlank(titleScraperLangu)) {
            md.storeMetadata(MediaMetadata.TITLE, titleScraperLangu);
        } else if (StringUtils.isNotBlank(titleEN)) {
            md.storeMetadata(MediaMetadata.TITLE, titleEN);
        } else {
            md.storeMetadata(MediaMetadata.TITLE, titleFirst);
        }
    }

    @Override
    public MediaMetadata getEpisodeMetadata(MediaScrapeOptions options) throws Exception {
        MediaMetadata md = new MediaMetadata(providerInfo.getId());

        String id = "";
        String langu = options.getLanguage().name();

        // id from result
        if (options.getResult() != null) {
            id = options.getResult().getId();
        }

        // do we have an id from the options?
        if (StringUtils.isEmpty(id)) {
            id = options.getId(providerInfo.getId());
        }

        if (StringUtils.isEmpty(id)) {
            return md;
        }

        // get episode number and season number
        int seasonNr = -1;
        int episodeNr = -1;

        try {
            seasonNr = Integer.parseInt(options.getId(MediaMetadata.SEASON_NR));
            episodeNr = Integer.parseInt(options.getId(MediaMetadata.EPISODE_NR));
        } catch (Exception e) {
            LOGGER.warn("error parsing season/episode number");
        }

        if (seasonNr == -1 || episodeNr == -1) {
            return md;
        }

        trackConnections();

        String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
                + id;
        Document doc = null;
        try {
            CachedUrl cachedUrl = new CachedUrl(url);
            doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
        } catch (Exception e) {
            LOGGER.error("failed to get episode metadata: " + e.getMessage());

            // clear cache
            CachedUrl.removeCachedFileForUrl(url);
        }

        if (doc == null || doc.children().size() == 0) {
            return md;
        }

        md.setId(providerInfo.getId(), id);

        List<Episode> episodes = parseEpisodes(doc);

        Episode episode = null;

        // filter out the episode
        for (Episode ep : episodes) {
            if (ep.season == seasonNr && ep.episode == episodeNr) {
                episode = ep;
                break;
            }
        }

        if (episode == null) {
            return md;
        }

        String title = episode.titles.get(langu);
        if (StringUtils.isBlank(title)) {
            title = episode.titles.get("en");
        }
        if (StringUtils.isBlank(title)) {
            title = episode.titles.get("x-jat");
        }
        md.storeMetadata(MediaMetadata.TITLE, title);
        md.storeMetadata(MediaMetadata.PLOT, episode.summary);
        md.storeMetadata(MediaMetadata.RATING, episode.rating);
        md.storeMetadata(MediaMetadata.RELEASE_DATE, episode.airdate);
        md.storeMetadata(MediaMetadata.RUNTIME, episode.runtime);
        md.setId(providerInfo.getId(), episode.id);

        return md;
    }

    private List<Episode> parseEpisodes(Document doc) {
        List<Episode> episodes = new ArrayList<Episode>();

        Element anime = doc.child(0);
        Element eps = null;
        // find the "episodes" child
        for (Element e : anime.children()) {
            if ("episodes".equalsIgnoreCase(e.tagName())) {
                eps = e;
                break;
            }
        }

        if (eps == null) {
            return episodes;
        }

        for (Element e : eps.children()) {
            // filter out the desired episode
            if ("episode".equals(e.tagName())) {
                Episode episode = new Episode();
                try {
                    episode.id = Integer.parseInt(e.attr("id"));
                } catch (NumberFormatException ex) {
                }
                for (Element episodeInfo : e.children()) {
                    if ("epno".equalsIgnoreCase(episodeInfo.tagName())) {
                        try {
                            episode.episode = Integer.parseInt(episodeInfo.text());

                            // looks like anidb is storing anything in a single season, so put 1 to season, if type = 1
                            if ("1".equals(episodeInfo.attr("type"))) {
                                episode.season = 1;
                            } else {
                                // else - we see them as "specials"
                                episode.season = 0;
                            }

                        } catch (NumberFormatException ex) {
                        }
                        continue;
                    }

                    if ("length".equalsIgnoreCase(episodeInfo.tagName())) {
                        try {
                            episode.runtime = Integer.parseInt(episodeInfo.text());
                        } catch (NumberFormatException ex) {
                        }
                        continue;
                    }

                    if ("airdate".equalsIgnoreCase(episodeInfo.tagName())) {
                        episode.airdate = episodeInfo.text();
                        continue;
                    }

                    if ("rating".equalsIgnoreCase(episodeInfo.tagName())) {
                        try {
                            episode.rating = Float.parseFloat(episodeInfo.text());
                        } catch (NumberFormatException ex) {
                        }
                        continue;
                    }

                    if ("title".equalsIgnoreCase(episodeInfo.tagName())) {
                        try {
                            episode.titles.put(episodeInfo.attr("xml:lang").toLowerCase(), episodeInfo.text());
                        } catch (Exception ex) {
                        }
                        continue;
                    }

                    if ("summary".equalsIgnoreCase(episodeInfo.tagName())) {
                        episode.summary = episodeInfo.text();
                        continue;
                    }
                }
                episodes.add(episode);
            }
        }

        return episodes;
    }

    @Override
    public List<MediaSearchResult> search(MediaSearchOptions options) throws Exception {
        LOGGER.debug("search() " + options.toString());

        synchronized (AniDBMetadataProvider.class) {
            // first run: build up the anime name list
            if (showsForLookup.size() == 0) {
                buildTitleHashMap();
            }
        }

        List<MediaSearchResult> results = new ArrayList<MediaSearchResult>();

        if (options.getMediaType() != MediaType.TV_SHOW) {
            throw new Exception("wrong media type for this scraper");
        }

        // detect the string to search
        String searchString = "";
        if (StringUtils.isNotEmpty(options.get(MediaSearchOptions.SearchParam.QUERY))) {
            searchString = options.get(MediaSearchOptions.SearchParam.QUERY);
        }

        if (StringUtils.isEmpty(searchString)
                && StringUtils.isNotEmpty(options.get(MediaSearchOptions.SearchParam.TITLE))) {
            searchString = options.get(MediaSearchOptions.SearchParam.TITLE);
        }

        // return an empty search result if no query provided
        if (StringUtils.isEmpty(searchString)) {
            return results;
        }

        List<Integer> foundIds = new ArrayList<Integer>();
        for (Entry<String, List<AniDBShow>> entry : showsForLookup.entrySet()) {
            String title = entry.getKey();
            float score = Similarity.compareStringsWithoutLog(title, searchString);
            if (score > 0.4) {
                for (AniDBShow show : entry.getValue()) {
                    if (!foundIds.contains(show.aniDbId)) {
                        MediaSearchResult result = new MediaSearchResult(providerInfo.getId());
                        result.setId(String.valueOf(show.aniDbId));
                        result.setTitle(show.title);
                        results.add(result);
                        result.setScore(score);
                        foundIds.add(show.aniDbId);
                    }
                }
            }
        }

        // sort
        Collections.sort(results);
        Collections.reverse(results);

        return results;
    }

    @Override
    public List<MediaEpisode> getEpisodeList(MediaScrapeOptions options) throws Exception {
        List<MediaEpisode> episodes = new ArrayList<MediaEpisode>();

        String id = "";
        String langu = options.getLanguage().name();

        // id from result
        if (options.getResult() != null) {
            id = options.getResult().getId();
        }

        // do we have an id from the options?
        if (StringUtils.isEmpty(id)) {
            id = options.getId(providerInfo.getId());
        }

        if (StringUtils.isEmpty(id)) {
            return episodes;
        }

        trackConnections();

        String url = "http://api.anidb.net:9001/httpapi?request=anime&client=tinymediamanager&clientver=2&protover=1&aid="
                + id;
        Document doc = null;
        try {
            CachedUrl cachedUrl = new CachedUrl(url);
            doc = Jsoup.parse(cachedUrl.getInputStream(), "UTF-8", "", Parser.xmlParser());
        } catch (Exception e) {
            LOGGER.error("error getting episode list: " + e.getMessage());

            // clear cache
            CachedUrl.removeCachedFileForUrl(url);
        }

        if (doc == null || doc.children().size() == 0) {
            return episodes;
        }

        // filter out the episode
        for (Episode ep : parseEpisodes(doc)) {
            MediaEpisode episode = new MediaEpisode(getProviderInfo().getId());
            episode.title = ep.titles.get(langu);
            episode.season = ep.season;
            episode.episode = ep.episode;
            if (StringUtils.isBlank(episode.title)) {
                episode.title = ep.titles.get("en");
            }
            if (StringUtils.isBlank(episode.title)) {
                episode.title = ep.titles.get("x-jat");
            }

            episode.plot = ep.summary;
            episode.rating = ep.rating;
            episode.firstAired = ep.airdate;
            episode.ids.put(providerInfo.getId(), ep.id);
        }

        return episodes;
    }

    /*
     * build up the hashmap for a fast title search
     */
    private void buildTitleHashMap() {
        // <aid>|<type>|<language>|<title>
        // type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per
        // language)
        Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
        Scanner scanner = null;
        String url = "http://anidb.net/api/anime-titles.dat.gz";
        try {
            CachedUrl animeList = new CachedUrl(url);
            // scanner = new Scanner(new GZIPInputStream(animeList.getInputStream()));
            // DecompressingHttpClient is decompressing the gz from animedb due to wrong http-server configuration
            scanner = new Scanner(animeList.getInputStream(), "UTF-8");
            while (scanner.hasNextLine()) {
                Matcher matcher = pattern.matcher(scanner.nextLine());

                if (matcher.matches()) {
                    AniDBShow show = new AniDBShow();
                    show.aniDbId = Integer.parseInt(matcher.group(1));
                    show.language = matcher.group(3);
                    show.title = matcher.group(4);

                    List<AniDBShow> shows = showsForLookup.get(show.title);
                    if (shows == null) {
                        shows = new ArrayList<AniDBShow>();
                        showsForLookup.put(show.title, shows);
                    }

                    if (shows != null) {
                        shows.add(show);
                    }
                }
            }
        } catch (InterruptedException e) {
            LOGGER.warn("interrupted image download");

            // clear Cache
            CachedUrl.removeCachedFileForUrl(url);
        } catch (IOException e) {
            LOGGER.error("error getting AniDB index");

            // clear Cache
            CachedUrl.removeCachedFileForUrl(url);
        } finally {
            if (scanner != null) {
                try {
                    scanner.close();
                } catch (Exception e) {
                }
            }
        }
    }

    /*
     * Track connections and throttle if needed.
     */
    private void trackConnections() {
        Long currentTime = System.currentTimeMillis();
        if (connectionCounter.count() == connectionCounter.maxSize()) {
            Long oldestConnection = connectionCounter.getTailItem();
            if (oldestConnection > (currentTime - 10000)) {
                LOGGER.debug("connection limit reached, throttling " + connectionCounter);
                try {
                    Thread.sleep(11000 - (currentTime - oldestConnection));
                } catch (InterruptedException e) {
                    LOGGER.warn(e.getMessage());
                }
            }
        }

        currentTime = System.currentTimeMillis();
        connectionCounter.add(currentTime);
    }

    @Override
    public List<MediaArtwork> getArtwork(MediaScrapeOptions options) throws Exception {
        List<MediaArtwork> artwork = new ArrayList<MediaArtwork>();
        String id = "";

        // check if there is a metadata containing an id
        if (options.getMetadata() != null) {
            id = (String) options.getMetadata().getId(providerInfo.getId());
        }

        // get the id from the options
        if (StringUtils.isEmpty(id)) {
            id = options.getId(providerInfo.getId());
        }

        if (StringUtils.isEmpty(id)) {
            return artwork;
        }

        switch (options.getArtworkType()) {
        // AniDB only offers Poster
        case ALL:
        case POSTER:
            MediaMetadata md;
            try {
                md = getTvShowMetadata(options);
            } catch (Exception e) {
                return artwork;
            }

            MediaArtwork ma = new MediaArtwork();
            ma.setDefaultUrl(md.getStringValue(MediaMetadata.POSTER_URL));
            ma.setPreviewUrl(md.getStringValue(MediaMetadata.POSTER_URL));
            ma.setLanguage(options.getLanguage().name());
            ma.setType(MediaArtworkType.POSTER);
            artwork.add(ma);

            break;

        default:
            return artwork;
        }

        return artwork;
    }

    /****************************************************************************
     * helper class for episode extraction
     ****************************************************************************/
    private static class Episode {
        int id = -1;
        int episode = -1;
        int season = -1;
        int runtime = 0;
        String airdate = "";
        float rating = 0;
        String summary = "";
        HashMap<String, String> titles = new HashMap<String, String>();
    }
}