List of usage examples for org.jsoup.nodes Document getElementsByAttributeValueMatching
public Elements getElementsByAttributeValueMatching(String key, Pattern pattern)
From source file:faescapeplan.FAEscapePlanUI.java
private ArrayList<String> indexJournals() { ArrayList<String> journalList = new ArrayList<>(); updateTextLog("Indexing journal entries..."); try {/* w ww. jav a2 s . c o m*/ Document currentPage = Jsoup.connect("http://www.furaffinity.net/journals/" + userData.getName() + "/") .cookies(userData.getCookies()).userAgent(USER_AGENT).get(); Elements elementList = currentPage.getElementsByAttributeValueMatching("id", "jid:\\d+"); for (Element item : elementList) { String cleanJid = item.attr("id").replace("jid:", ""); journalList.add(cleanJid); } } catch (IOException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); JOptionPane.showMessageDialog(this, "An IOException occurred while indexing journals"); } return journalList; }
From source file:faescapeplan.FAEscapePlanUI.java
private ArrayList<String> indexSection(String section) { ArrayList<String> idList = new ArrayList<>(); boolean itemsRemain = true; int pageCount = 1; updateTextLog("Indexing " + section + "..."); while (itemsRemain) { try {/*from w ww . ja v a2 s .c o m*/ Document currentPage = Jsoup .connect("http://www.furaffinity.net/" + section + "/" + userData.getName() + "/" + pageCount + "/") // TEST .timeout(10000).userAgent(USER_AGENT).cookies(userData.getCookies()).get(); if (currentPage.getElementById("no-images") == null) { updateTextLog("Indexing page " + pageCount); Elements elementList = currentPage.getElementsByAttributeValueMatching("id", "sid_\\d+"); for (Element item : elementList) { String cleanId = item.attr("id").replace("sid_", ""); idList.add(cleanId); } pageCount++; } else { itemsRemain = false; updateTextLog("Finished indexing " + section); } } catch (HttpStatusException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Could not connect to FA"); // DEBUG break; } catch (SocketTimeoutException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Connection timed out"); // DEBUG break; } catch (IOException ex) { Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex); System.out.println("An IO Exception occurred while indexing " + section); // DEBUG break; } } return idList; }
From source file:org.tinymediamanager.scraper.aebn.AebnMetadataProvider.java
/** * Search for movies at aebn.net./*from ww w . java 2s .co m*/ * */ @Override public List<MediaSearchResult> search(MediaSearchOptions query) throws Exception { LOGGER.debug("AEBN: search() {}", query); List<MediaSearchResult> resultList = new ArrayList<MediaSearchResult>(); Elements movies = null; String searchString = ""; // Search for query if (StringUtils.isNotEmpty(query.get(MediaSearchOptions.SearchParam.QUERY))) { searchString = query.get(MediaSearchOptions.SearchParam.QUERY); } // Search String searchUrl = BASE_DATAURL + "/dispatcher/fts?userQuery=" + URLEncoder.encode(cleanSearchQuery(searchString), "UTF-8") + "&targetSearchMode=basic&isAdvancedSearch=true&isFlushAdvancedSearchCriteria=false" + "&count=" + SEARCH_COUNT.toString() + "&imageType=Large&sortType=Relevance"; try { LOGGER.info("========= BEGIN AEBN Scraper Search for: {}", searchString); Url url = new Url(searchUrl); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // only look for movie links like // <a id="FTSMovieSearch_link_title_detail_30" ... </a> movies = doc.getElementsByAttributeValueMatching("id", "FTSMovieSearch_link_title_detail_\\d+"); LOGGER.debug("AEBN: found {} search results", movies.size()); } catch (Exception e) { LOGGER.error("AEBN: failed to search for {}: ", searchString, e); } if (movies == null || movies.isEmpty()) { LOGGER.debug("AEBN: no movie found"); return resultList; } // there are search results, so fill media data structure HashSet<String> foundResultUrls = new HashSet<String>(); for (Element anchor : movies) { try { String movieUrl = BASE_DATAURL + StrgUtils.substr(anchor.toString(), "href=\\\"(.*?)\\\""); String movieId = StrgUtils.substr(anchor.toString(), "movieId=(\\d+)"); String movieName = StringEscapeUtils.unescapeHtml4(anchor.text()); String posterUrl = BASE_IMGURL + "/Stream/Movie/Boxcovers/a" + movieId + "_160w.jpg"; LOGGER.debug("AEBN: found movie {} (id{})", movieName, movieId); // check if it is a valid AEBN id if (!isValidAebnId(Integer.parseInt(movieId))) { LOGGER.error("AEBN: id({}) is not a valid aebn id", movieId); } MediaSearchResult sr = new MediaSearchResult(providerInfo.getId()); sr.setId(movieId); sr.setIMDBId(""); sr.setTitle(movieName); sr.setOriginalTitle(movieName); // sr.setYear not possible, no data at this point sr.setYear(null); sr.setMediaType(MediaType.MOVIE); sr.setUrl(movieUrl); sr.setPosterUrl(posterUrl); // compare score based on names float score = MetadataUtil.calculateScore(searchString, movieName); if (posterUrl.isEmpty() || posterUrl.contains("nopicture")) { LOGGER.debug("AEBN: no poster - downgrading score by 0.01"); score = score - 0.01f; } sr.setScore(score); // check if result has at least a title and id if (StringUtils.isBlank(sr.getTitle()) || StringUtils.isBlank(sr.getId())) { LOGGER.warn("AEBN: no title nor id, skipping"); continue; } // check if the movie has been already added to the search results if (foundResultUrls.contains(sr.getUrl())) { continue; } foundResultUrls.add(sr.getUrl()); // populate extra arguments (deprecated) // MetadataUtil.copySearchQueryToSearchResult(query, sr); resultList.add(sr); } catch (Exception e) { LOGGER.warn("AEBN: error parsing search result: {}", e); } } Collections.sort(resultList); Collections.reverse(resultList); return resultList; }
From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java
@Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("getMetadata() " + options.toString()); if (options.getType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getType()); }/*from w ww .j a va 2s. com*/ // we have 3 entry points here // a) getMetadata has been called with an ofdbId // b) getMetadata has been called with an imdbId // c) getMetadata has been called from a previous search String detailUrl = ""; // case a) and c) if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId())) || options.getResult() != null) { if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId()))) { detailUrl = "http://www.ofdb.de/view.php?page=film&fid=" + options.getId(getProviderInfo().getId()); } else { detailUrl = options.getResult().getUrl(); } } // case b) if (options.getResult() == null && StringUtils.isNotBlank(options.getId(MediaMetadata.IMDB))) { MediaSearchOptions searchOptions = new MediaSearchOptions(MediaType.MOVIE); searchOptions.setImdbId(options.getId(MediaMetadata.IMDB)); try { List<MediaSearchResult> results = search(searchOptions); if (results != null && !results.isEmpty()) { options.setResult(results.get(0)); detailUrl = options.getResult().getUrl(); } } catch (Exception e) { LOGGER.warn("failed IMDB search: " + e.getMessage()); } } // we can only work further if we got a search result on ofdb.de if (StringUtils.isBlank(detailUrl)) { throw new Exception("We did not get any useful movie url"); } MediaMetadata md = new MediaMetadata(providerInfo.getId()); // generic Elements used all over Elements el = null; String ofdbId = StrgUtils.substr(detailUrl, "film\\/(\\d+),"); if (StringUtils.isBlank(ofdbId)) { ofdbId = StrgUtils.substr(detailUrl, "fid=(\\d+)"); } Url url; try { LOGGER.trace("get details page"); url = new Url(detailUrl); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); if (doc.getAllElements().size() < 10) { throw new Exception("meh - we did not receive a valid web page"); } // parse details // IMDB ID "http://www.imdb.com/Title?1194173" el = doc.getElementsByAttributeValueContaining("href", "imdb.com"); if (!el.isEmpty()) { md.setId(MediaMetadata.IMDB, "tt" + StrgUtils.substr(el.first().attr("href"), "\\?(\\d+)")); } // title / year // <meta property="og:title" content="Bourne Vermchtnis, Das (2012)" /> el = doc.getElementsByAttributeValue("property", "og:title"); if (!el.isEmpty()) { String[] ty = parseTitle(el.first().attr("content")); md.setTitle(StrgUtils.removeCommonSortableName(ty[0])); try { md.setYear(Integer.parseInt(ty[1])); } catch (Exception ignored) { } } // another year position if (md.getYear() == 0) { // <a href="view.php?page=blaettern&Kat=Jahr&Text=2012">2012</a> el = doc.getElementsByAttributeValueContaining("href", "Kat=Jahr"); try { md.setYear(Integer.parseInt(el.first().text())); } catch (Exception ignored) { } } // original title (has to be searched with a regexp) // <tr valign="top"> // <td nowrap=""><font class="Normal" face="Arial,Helvetica,sans-serif" // size="2">Originaltitel:</font></td> // <td> </td> // <td width="99%"><font class="Daten" face="Arial,Helvetica,sans-serif" // size="2"><b>Brave</b></font></td> // </tr> String originalTitle = StrgUtils.substr(doc.body().html(), "(?s)Originaltitel.*?<b>(.*?)</b>"); if (!originalTitle.isEmpty()) { md.setOriginalTitle(StrgUtils.removeCommonSortableName(originalTitle)); } // Genre: <a href="view.php?page=genre&Genre=Action">Action</a> el = doc.getElementsByAttributeValueContaining("href", "page=genre"); for (Element g : el) { md.addGenre(getTmmGenre(g.text())); } // rating // <div itemtype="http://schema.org/AggregateRating" itemscope // itemprop="aggregateRating">Note: <span // itemprop="ratingValue">6.73</span><meta // itemprop="worstRating" content="1" /> el = doc.getElementsByAttributeValue("itemprop", "ratingValue"); if (!el.isEmpty()) { String r = el.text(); if (!r.isEmpty()) { try { md.setRating(Float.parseFloat(r)); } catch (Exception e) { LOGGER.debug("could not parse rating"); } } } // get PlotLink; open url and parse // <a href="plot/22523,31360,Die-Bourne-Identitt"><b>[mehr]</b></a> LOGGER.trace("parse plot"); el = doc.getElementsByAttributeValueMatching("href", "plot\\/\\d+,"); if (!el.isEmpty()) { String plotUrl = BASE_URL + "/" + el.first().attr("href"); try { url = new Url(plotUrl); in = url.getInputStream(); Document plot = Jsoup.parse(in, "UTF-8", ""); in.close(); Elements block = plot.getElementsByClass("Blocksatz"); // first // Blocksatz // is plot String p = block.first().text(); // remove all html stuff p = p.substring(p.indexOf("Mal gelesen") + 12); // remove "header" md.setPlot(p); } catch (Exception e) { LOGGER.error("failed to get plot page: " + e.getMessage()); } } // http://www.ofdb.de/view.php?page=film_detail&fid=226745 LOGGER.debug("parse actor detail"); String movieDetail = BASE_URL + "/view.php?page=film_detail&fid=" + ofdbId; doc = null; try { url = new Url(movieDetail); in = url.getInputStream(); doc = Jsoup.parse(in, "UTF-8", ""); in.close(); } catch (Exception e) { LOGGER.error("failed to get detail page: " + e.getMessage()); } if (doc != null) { parseCast(doc.getElementsContainingOwnText("Regie"), MediaCastMember.CastType.DIRECTOR, md); parseCast(doc.getElementsContainingOwnText("Darsteller"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Stimme/Sprecher"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Synchronstimme (deutsch)"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Drehbuchautor(in)"), MediaCastMember.CastType.WRITER, md); parseCast(doc.getElementsContainingOwnText("Produzent(in)"), MediaCastMember.CastType.PRODUCER, md); } } catch (Exception e) { LOGGER.error("Error parsing " + detailUrl); throw e; } return md; }
From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java
@Override public List<MediaSearchResult> search(MediaSearchOptions options) throws Exception { LOGGER.debug("search() " + options.toString()); if (options.getMediaType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getMediaType()); }//from w ww.j av a 2 s . c o m List<MediaSearchResult> resultList = new ArrayList<>(); String searchString = ""; String searchQuery = ""; String imdb = ""; Elements filme = null; int myear = options.getYear(); /* * Kat = All | Titel | Person | DTitel | OTitel | Regie | Darsteller | Song | Rolle | EAN| IMDb | Google * http://www.ofdb.de//view.php?page=suchergebnis &Kat=xxxxxxxxx&SText=yyyyyyyyyyy */ // 1. search with imdbId if (StringUtils.isNotEmpty(options.getImdbId()) && (filme == null || filme.isEmpty())) { try { imdb = options.getImdbId(); searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + imdb; LOGGER.debug("search with imdbId: " + imdb); Url url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // only look for movie links filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); LOGGER.debug("found " + filme.size() + " search results"); } catch (Exception e) { LOGGER.error("failed to search for imdb Id " + imdb + ": " + e.getMessage()); } } // 2. search for search string if (StringUtils.isNotEmpty(options.getQuery()) && (filme == null || filme.isEmpty())) { try { String query = options.getQuery(); searchQuery = query; query = MetadataUtil.removeNonSearchCharacters(query); searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=All&SText=" + URLEncoder.encode(cleanSearch(query), "UTF-8"); LOGGER.debug("search for everything: " + query); Url url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // only look for movie links filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); LOGGER.debug("found " + filme.size() + " search results"); } catch (Exception e) { LOGGER.error("failed to search for " + searchQuery + ": " + e.getMessage()); } } if (filme == null || filme.isEmpty()) { LOGGER.debug("nothing found :("); return resultList; } // <a href="film/22523,Die-Bourne-Identitt" // onmouseover="Tip('<img src="images/film/22/22523.jpg" // width="120" height="170">',SHADOW,true)">Bourne // Identitt, Die<font size="1"> / Bourne Identity, The</font> (2002)</a> HashSet<String> foundResultUrls = new HashSet<>(); for (Element a : filme) { try { MediaSearchResult sr = new MediaSearchResult(providerInfo.getId(), MediaType.MOVIE); if (StringUtils.isNotEmpty(imdb)) { sr.setIMDBId(imdb); } sr.setId(StrgUtils.substr(a.toString(), "film\\/(\\d+),")); // OFDB ID sr.setTitle(StringEscapeUtils.unescapeHtml4(StrgUtils .removeCommonSortableName(StrgUtils.substr(a.toString(), ".*>(.*?)(\\[.*?\\])?<font")))); LOGGER.debug("found movie " + sr.getTitle()); sr.setOriginalTitle(StringEscapeUtils.unescapeHtml4( StrgUtils.removeCommonSortableName(StrgUtils.substr(a.toString(), ".*> / (.*?)</font")))); try { sr.setYear(Integer.parseInt(StrgUtils.substr(a.toString(), "font> \\((.*?)\\)<\\/a"))); } catch (Exception ignored) { } sr.setUrl(BASE_URL + "/" + StrgUtils.substr(a.toString(), "href=\\\"(.*?)\\\"")); sr.setPosterUrl(BASE_URL + "/images" + StrgUtils.substr(a.toString(), "images(.*?)\\"")); // check if it has at least a title and url if (StringUtils.isBlank(sr.getTitle()) || StringUtils.isBlank(sr.getUrl())) { continue; } // OFDB could provide linke twice - check if that has been already added if (foundResultUrls.contains(sr.getUrl())) { continue; } foundResultUrls.add(sr.getUrl()); if (imdb.equals(sr.getIMDBId())) { // perfect match sr.setScore(1); } else { // compare score based on names float score = MetadataUtil.calculateScore(searchQuery, sr.getTitle()); if (yearDiffers(myear, sr.getYear())) { float diff = (float) Math.abs(myear - sr.getYear()) / 100; LOGGER.debug( "parsed year does not match search result year - downgrading score by " + diff); score -= diff; } sr.setScore(score); } resultList.add(sr); } catch (Exception e) { LOGGER.warn("error parsing movie result: " + e.getMessage()); } } Collections.sort(resultList); Collections.reverse(resultList); return resultList; }
From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java
@Override public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception { LOGGER.debug("getTrailers() " + options.toString()); List<MediaTrailer> trailers = new ArrayList<>(); if (!MetadataUtil.isValidImdbId(options.getImdbId())) { LOGGER.debug("IMDB id not found"); return trailers; }//from w w w . j ava 2s.c om /* * function getTrailerData(ci) { switch (ci) { case 'http://de.clip-1.filmtrailer.com/9507_31566_a_1.flv?log_var=72|491100001 -1|-' : return * '<b>Trailer 1</b><br><i>(small)</i><br><br>» 160px<br><br>Download:<br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(medium)</i><br><br>» * 240px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(large)</i><br><br>» * 320px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xlarge)</i><br><br>» * 400px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xxlarge)</i><br><br>» * 640px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_1.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(small)</i><br><br>» * 160px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(medium)</i><br><br>» * 240px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(large)</i><br><br>» * 320px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xlarge)</i><br><br>» * 400px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xxlarge)</i><br><br>» * 640px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; } } */ Url url = null; String searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + options.getImdbId(); try { // search with IMDB url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); Elements filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); if (filme == null || filme.isEmpty()) { LOGGER.debug("found no search results"); return trailers; } LOGGER.debug("found " + filme.size() + " search results"); // hopefully // only one LOGGER.debug("get (trailer) details page"); url = new Url(BASE_URL + "/" + StrgUtils.substr(filme.first().toString(), "href=\\\"(.*?)\\\"")); in = url.getInputStream(); doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // OLD STYLE // <b>Trailer 1</b><br><i>(xxlarge)</i><br><br>» 640px<br><br>Download:<br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br> Pattern regex = Pattern.compile("return '(.*?)';"); Matcher m = regex.matcher(doc.toString()); while (m.find()) { String s = m.group(1); String tname = StrgUtils.substr(s, "<b>(.*?)</b>"); String tpix = StrgUtils.substr(s, "raquo; (.*?)x<br>"); // String tqual = StrgUtils.substr(s, "<i>\\((.*?)\\)</i>"); // url + format Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>"); Matcher lm = lr.matcher(s); while (lm.find()) { String turl = lm.group(1); // String tformat = lm.group(2); MediaTrailer trailer = new MediaTrailer(); trailer.setName(tname); // trailer.setQuality(tpix + " (" + tformat + ")"); trailer.setQuality(tpix); trailer.setProvider("filmtrailer"); trailer.setUrl(turl); LOGGER.debug(trailer.toString()); trailers.add(trailer); } } // NEW STYLE (additional!) // <div class="clips" id="clips2" style="display: none;"> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 1:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 2:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 3:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <br> // </div> // new style size // 1 = 160 x 90 = small // 2 = 240 x 136 = medium // 3 = 320 x 180 = large // 4 = 400 x 226 = xlarge // 5 = 640 x 360 = xxlarge ; regex = Pattern.compile("<i>(.*?)</i>(.*?)<br>", Pattern.DOTALL); // get them as single trailer line m = regex.matcher(doc.getElementsByClass("clips").html()); while (m.find()) { // LOGGER.info(doc.getElementsByClass("clips").html()); // parse each line with 5 qualities String tname = m.group(1).trim(); tname = tname.replaceFirst(":$", ""); // replace ending colon String urls = m.group(2); // url + format Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>"); Matcher lm = lr.matcher(urls); while (lm.find()) { String turl = lm.group(1); String tpix = ""; String tformat = lm.group(2).replaceAll(" ", "").trim(); switch (tformat) { case "small": tpix = "90p"; break; case "medium": tpix = "136p"; break; case "large": tpix = "180p"; break; case "xlarge": tpix = "226p"; break; case "xxlarge": tpix = "360p"; break; default: break; } MediaTrailer trailer = new MediaTrailer(); trailer.setName(tname); // trailer.setQuality(tpix + " (" + tformat + ")"); trailer.setQuality(tpix); trailer.setProvider("filmtrailer"); trailer.setUrl(turl); LOGGER.debug(trailer.toString()); trailers.add(trailer); } } } catch (Exception e) { if (url != null) { LOGGER.error("Error parsing {}", url.toString()); } else { LOGGER.error("Error parsing {}", searchString); } throw e; } return trailers; }