Example usage for org.jsoup.select Elements first

List of usage examples for org.jsoup.select Elements first

Introduction

In this page you can find the example usage for org.jsoup.select Elements first.

Prototype

public Element first() 

Source Link

Document

Get the first matched element.

Usage

From source file:org.bonitasoft.web.designer.visitors.HtmlBuilderVisitorTest.java

@Test
public void should_add_elements_to_the_container_rows() throws Exception {

    // we should have two div.col-xs-12 with two div.row containing added components
    Elements rows = toBody(
            visitor.visit(aContainer().with(aRow().with(aComponent().withWidgetId("pbLabel").build()),
                    aRow().with(aComponent().withWidgetId("customLabel").build())).build())).select(".row");

    assertThat(rows.size()).isEqualTo(2);
    assertThat(rows.first().select("pb-label").outerHtml()).isEqualTo("<pb-label></pb-label>");
    assertThat(rows.last().select("custom-label").outerHtml()).isEqualTo("<custom-label></custom-label>");
}

From source file:org.coronastreet.gpxconverter.StravaForm.java

public void upload() {
    //httpClient = new DefaultHttpClient();
    httpClient = HttpClientBuilder.create().build();
    localContext = new BasicHttpContext();
    cookieStore = new BasicCookieStore();
    localContext.setAttribute(HttpClientContext.COOKIE_STORE, cookieStore);
    //httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY);

    if (doLogin()) {
        //log("Ok....logged in...");
        try {/*from  w ww . j av  a 2 s.com*/
            // Have to fetch the form to get the CSRF Token
            HttpGet get = new HttpGet(uploadFormURL);
            HttpResponse formResponse = httpClient.execute(get, localContext);
            //log("Fetched the upload form...: " + formResponse.getStatusLine());
            org.jsoup.nodes.Document doc = Jsoup.parse(EntityUtils.toString(formResponse.getEntity()));
            String csrftoken, csrfparam;
            Elements metalinksParam = doc.select("meta[name=csrf-param]");
            if (!metalinksParam.isEmpty()) {
                csrfparam = metalinksParam.first().attr("content");
            } else {
                csrfparam = null;
                log("Missing csrf-param?");
            }
            Elements metalinksToken = doc.select("meta[name=csrf-token]");
            if (!metalinksToken.isEmpty()) {
                csrftoken = metalinksToken.first().attr("content");
            } else {
                csrftoken = null;
                log("Missing csrf-token?");
            }

            HttpPost request = new HttpPost(uploadURL);
            request.setHeader("X-CSRF-Token", csrftoken);

            MultipartEntity entity = new MultipartEntity(HttpMultipartMode.BROWSER_COMPATIBLE);
            entity.addPart("method", new StringBody("post"));
            entity.addPart("new_uploader", new StringBody("1"));
            entity.addPart(csrfparam, new StringBody(csrftoken));
            entity.addPart("files[]",
                    new InputStreamBody(document2InputStream(outDoc), "application/octet-stream", "temp.tcx"));

            // Need to do this bit because without it you can't disable chunked encoding, and Strava doesn't support chunked.
            ByteArrayOutputStream bArrOS = new ByteArrayOutputStream();
            entity.writeTo(bArrOS);
            bArrOS.flush();
            ByteArrayEntity bArrEntity = new ByteArrayEntity(bArrOS.toByteArray());
            bArrOS.close();

            bArrEntity.setChunked(false);
            bArrEntity.setContentEncoding(entity.getContentEncoding());
            bArrEntity.setContentType(entity.getContentType());

            request.setEntity(bArrEntity);

            HttpResponse response = httpClient.execute(request, localContext);

            if (response.getStatusLine().getStatusCode() != 200) {
                log("Failed to Upload");
                HttpEntity en = response.getEntity();
                if (en != null) {
                    String output = EntityUtils.toString(en);
                    log(output);
                }
            } else {
                HttpEntity ent = response.getEntity();
                if (ent != null) {
                    String output = EntityUtils.toString(ent);
                    //log(output);
                    JSONObject userInfo = new JSONArray(output).getJSONObject(0);
                    //log("Object: " + userInfo.toString());

                    if (userInfo.get("workflow").equals("Error")) {
                        log("Upload Error: " + userInfo.get("error"));
                    } else {
                        log("Successful Uploaded. ID is " + userInfo.get("id"));
                    }
                }
            }
            httpClient.close();
        } catch (Exception ex) {
            log("Exception? " + ex.getMessage());
            ex.printStackTrace();
            // handle exception here
        }
    } else {
        log("Failed to upload!");
    }
}

From source file:org.coronastreet.gpxconverter.StravaForm.java

protected boolean doLogin() {
    boolean ret = false;
    log("Authenticating athlete...");
    try {//w  w  w .  ja va 2 s .  c  om
        HttpGet get = new HttpGet(loginURL);
        HttpResponse response = httpClient.execute(get, localContext);
        //log("Fetched the login form...: " + response.getStatusLine());
        org.jsoup.nodes.Document doc = Jsoup.parse(EntityUtils.toString(response.getEntity()));
        String csrftoken, csrfparam;

        Elements metalinksParam = doc.select("meta[name=csrf-param]");
        if (!metalinksParam.isEmpty()) {
            csrfparam = metalinksParam.first().attr("content");
            log("Setting csrf-param to " + csrfparam);
        } else {
            csrfparam = null;
            log("Missing csrf-param?");
        }

        Elements metalinksToken = doc.select("meta[name=csrf-token]");
        if (!metalinksToken.isEmpty()) {
            csrftoken = metalinksToken.first().attr("content");
            log("Setting csrf-token to " + csrftoken);
        } else {
            csrftoken = null;
            log("Missing csrf-token?");
        }

        HttpPost post = new HttpPost(sessionURL);
        post.setHeader("Referer", "https://www.strava.com/login");
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();
        nvps.add(new BasicNameValuePair(csrfparam, csrftoken));
        nvps.add(new BasicNameValuePair("plan", ""));
        nvps.add(new BasicNameValuePair("email", email));
        nvps.add(new BasicNameValuePair("password", password));

        post.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

        HttpResponse sessionResponse = httpClient.execute(post, localContext);

        if (sessionResponse.getStatusLine().getStatusCode() != 302) {
            log("Failed to Login. " + sessionResponse.getStatusLine().getStatusCode());
            String output = EntityUtils.toString(sessionResponse.getEntity());
            log(output);
            ret = false;
        } else {
            ret = true;
        }
        HttpEntity entity = sessionResponse.getEntity();
        EntityUtils.consume(entity);

    } catch (Exception ex) {
        // handle exception here
        ex.printStackTrace();
    }
    return ret;
}

From source file:org.opens.tanaguru.crawler.CrawlerImpl.java

/**
 * Waiting for a better implementation, we parse here the html content
 * to detect the presence of the rel=canonical property.
 * @param content//from   ww w.j  a  va  2s  . co m
 * @return whether the current page defines a rel canonical Url and whether
 * this url is different from the current url.
 */
private boolean isRelCanonicalPage(Content content) {
    // @TODO make this implementation cleaner
    if (!treatRelCanonical) {
        return false;
    }
    if (!(content instanceof SSP)) {
        return false;
    }
    if (StringUtils.isBlank(((SSP) content).getSource())) {
        return false;
    }
    Elements relCanonical = Jsoup.parse(((SSP) content).getSource()).select(REL_CANONICAL_CSS_LIKE_QUERY);
    if (relCanonical.isEmpty() || relCanonical.size() > 1) {
        return false;
    }
    // At this step, we are sure that the rel canonical is defined and 
    // is unique
    String href = relCanonical.first().attr("href");
    if (href.equals(".")) {
        return false;
    }
    if (href.contains("//")) {
        href = href.substring(href.indexOf("//") + 2);
    }
    if (href.endsWith("/")) {
        href = href.substring(0, href.length() - 1);
    }
    String currentUrl = content.getURI();
    if (currentUrl.endsWith("/")) {
        currentUrl = currentUrl.substring(0, currentUrl.length() - 1);
    }
    if (currentUrl.contains("//")) {
        currentUrl = currentUrl.substring(currentUrl.indexOf("//") + 2);
    }
    if (currentUrl.equals(href)) {
        LOGGER.info("rel canonical present but points to itself " + content.getURI());
        return false;
    }
    return true;
}

From source file:org.sbs.goodcrawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ????/*from  ww w.ja  va2 s.co  m*/
 * @param elements
 * @return
 */
protected String getExtractText(Elements elements) {
    if (elements.size() == 0)
        return "";
    String temp = "";

    if (attr.equals("tostring")) {
        if (index == 0 || index > elements.size())
            temp = elements.first().toString();
        else
            temp = elements.get(index).toString();
    } else {
        if (index == 0 || index > elements.size())
            temp = elements.first().text();
        else
            temp = elements.get(index).text();
    }

    if (null != pattern) {
        Matcher m = pattern.matcher(temp);
        if (m.find()) {
            temp = m.group(1);
        }
    }
    return temp;
}

From source file:org.sbs.goodcrawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ??????//from   w  w  w .j ava 2 s.  co m
 * @param elements
 * @param attr
 * @return
 */
protected String getExtractAttr(Elements elements, String attr) {
    String temp = "";
    if (attr.equals("tostring")) {
        if (index == 0 || index > elements.size())
            temp = elements.first().toString();
        else
            temp = elements.get(index).toString();
    } else {
        if (index == 0 || index > elements.size())
            temp = elements.first().attr(attr);
        else
            temp = elements.get(index).attr(attr);
    }
    if (null != pattern) {
        Matcher m = pattern.matcher(temp);
        if (m.find()) {
            temp = m.group(1);
        }
    }
    return temp;
}

From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java

private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response,
        String instanceId) throws IOException {
    html = "<html><body>" + html + "</body></html>";
    Document doc = Jsoup.parse(html);

    Elements body = doc.getElementsByTag("body");
    if (!body.isEmpty()) {
        html = body.first().html();
    }/*from   w  w  w  .  j av  a2 s  .c om*/

    Elements images = doc.getElementsByTag("img");
    for (Element img : images) {
        String source = img.attr("src");
        String newSource = source;
        if (source.contains("/silverpeas")) {
            // need to convert in dataurl
            newSource = convertSpImageUrlToDataUrl(source);
        }
        img.attr("src", newSource);
    }
    Elements embeds = doc.getElementsByTag("embed");
    for (Element embed : embeds) {
        String htmlPart = embed.outerHtml();
        if (htmlPart.contains("flash")) {
            String attachmentId = htmlPart
                    .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length());
            attachmentId = attachmentId.substring(0, attachmentId.indexOf("/"));
            SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById(
                    new SimpleDocumentPK(attachmentId),
                    getUserInSession(request).getUserPreferences().getLanguage());
            String type = attachment.getContentType();
            String url = getServletContext().getContextPath() + "/services/spmobile/Attachment";
            url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang="
                    + getUserInSession(request).getUserPreferences().getLanguage() + "&userId="
                    + getUserInSession(request).getId();
            if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) {
                embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>");
                embed.remove();
            } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) {
                embed.parent()
                        .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />");
                embed.remove();
            }
        }
    }
    html = doc.outerHtml();
    OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
    writeContainer(out, html);
    out.flush();
}

From source file:org.tinymediamanager.scraper.aebn.AebnMetadataProvider.java

/**
 * Get movie meta data from aebn.net./*www  .  j a v  a 2s .  c o  m*/
 *
 */
@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("AEBN: getMetadata() {}", options);

    // check if there is already meta data present in the result
    if ((options.getResult() != null) && (options.getResult().getMediaMetadata() != null)) {
        LOGGER.debug("AEBN: return metadata from cache");
        return options.getResult().getMediaMetadata();
    }

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    Elements elements = null;
    Element element = null;
    Integer aebnId = 0;

    // get AebnId from previous search result
    if ((options.getResult() != null) && (options.getResult().getId() != null)) {
        aebnId = Integer.parseInt(options.getResult().getId());
        LOGGER.debug("AEBN: aebnId() from previous search result = {}", aebnId);
        // preset some values from search result (if there is one)
        // Use core.Utils.RemoveSortableName() if you want e.g. "Bourne Legacy, The" -> "The Bourne Legacy".
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE,
                StrgUtils.removeCommonSortableName(options.getResult().getOriginalTitle()));
        md.storeMetadata(MediaMetadata.TITLE,
                StrgUtils.removeCommonSortableName(options.getResult().getTitle()));
    }

    // or get AebnId from options
    if (!isValidAebnId(aebnId) && (options.getId(AEBNID) != null)) {
        LOGGER.debug("AEBN: aebnId() from options = {}", options.getId(AEBNID));
        aebnId = Integer.parseInt(options.getId(AEBNID));
    }

    if (!isValidAebnId(aebnId)) {
        LOGGER.warn("AEBN: no or incorrect aebnId, aborting");
        return md;
    }

    // ID
    md.setId(providerInfo.getId(), aebnId);
    LOGGER.debug("AEBN: aebnId({})", aebnId);

    // Base download url for data scraping
    String downloadUrl = BASE_DATAURL + "/dispatcher/movieDetail?movieId=" + aebnId;
    String locale = options.getLanguage().name();
    if (!StringUtils.isBlank(locale)) {
        downloadUrl = downloadUrl + "&locale=" + locale;
        LOGGER.debug("AEBN: used locale({})", locale);
    }

    // begin download and scrape
    try {
        LOGGER.debug("AEBN: download movie detail page");
        Url url = new Url(downloadUrl);
        InputStream in = url.getInputStream();
        Document document = Jsoup.parse(in, "UTF-8", "");
        in.close();

        // Title
        // <h1 itemprop="name" class="md-movieTitle" >Titelname</h1>
        LOGGER.debug("AEBN: parse title");
        elements = document.getElementsByAttributeValue("class", "md-movieTitle");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            element = elements.first();
            String movieTitle = cleanString(element.text());
            LOGGER.debug("AEBN: title({})", movieTitle);
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // Poster
        // front cover:
        // http://pic.aebn.net/Stream/Movie/Boxcovers/a66568_xlf.jpg
        String posterUrl = BASE_IMGURL + "/Stream/Movie/Boxcovers/a" + aebnId.toString() + "_xlf.jpg";
        md.storeMetadata(MediaMetadata.POSTER_URL, posterUrl);

        // Fanart/Background
        // http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534541.jpg
        // <img class="sceneThumbnail" alt="Scene Thumbnail" title="Scene Thumbnail" onError="..."
        // src="http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534544.jpg" onclick="..." />
        LOGGER.debug("AEBN: parse fanart / scene thumbs");
        elements = document.getElementsByAttributeValue("class", "SceneThumbnail");
        LOGGER.debug("AEBN: {} elements found", elements.size());
        int i = 1;
        for (Element anchor : elements) {
            String backgroundUrl = anchor.attr("src");
            LOGGER.debug("AEBN: backgroundUrl{}({})", i, backgroundUrl);
            md.storeMetadata("backgroundUrl" + Integer.valueOf(i).toString(), backgroundUrl);
            i++;
        }

        // Runtime
        LOGGER.debug("AEBN: parse runtime");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=duration]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)");
            element = elements.first();
            String movieRuntime = cleanString(element.attr("content"));
            movieRuntime = StrgUtils.substr(movieRuntime, "PT(\\d+)M");
            LOGGER.debug("AEBN: runtime({})", movieRuntime);
            md.storeMetadata(MediaMetadata.RUNTIME, movieRuntime);
        }

        // Year
        LOGGER.debug("AEBN: parse year");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=datePublished]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)");
            element = elements.first();
            String movieYear = cleanString(element.attr("content"));
            movieYear = StrgUtils.substr(movieYear, "(\\d+)-");
            LOGGER.debug("AEBN: year({})", movieYear);
            md.storeMetadata(MediaMetadata.YEAR, movieYear);
        }

        // Series (Collection)
        LOGGER.debug("AEBN: parse collection");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[class=series]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            element = elements.first();
            String movieCollection = cleanString(element.text());

            // Fake a TMDB_SET based on the hash value of the collection name
            int movieCollectionHash = movieCollection.hashCode();

            md.storeMetadata(MediaMetadata.COLLECTION_NAME, movieCollection);
            md.storeMetadata(MediaMetadata.TMDB_SET, movieCollectionHash);
            LOGGER.debug("AEBN: collection({}), hashcode({})", movieCollection, movieCollectionHash);
        }

        // Studio
        LOGGER.debug("AEBN: parse studio");
        elements = document.getElementsByAttributeValue("id", "md-details")
                .select("[itemprop=productionCompany]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String movieStudio = cleanString(elements.first().text());
            LOGGER.debug("AEBN: studio({})", movieStudio);
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, movieStudio);
        }

        // Genre
        LOGGER.debug("AEBN: parse genre");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=genre]");
        for (Element g : elements) {
            md.addGenre(getTmmGenre(g.text()));
        }
        // add basic genre, since all genres at AEBN could be summarised
        // into this one
        md.addGenre(MediaGenres.EROTIC);

        // Certification
        // no data scrapeable---but obviously it's adult only, so simply
        // generate it
        String movieCertification = null;
        Certification certification = null;
        String country = options.getCountry().getAlpha2();
        LOGGER.debug("AEBN: generate certification for {}", country);
        // @formatter:off
        if (country.equals("DE")) {
            movieCertification = "FSK 18";
        }
        if (country.equals("US")) {
            movieCertification = "NC-17";
        }
        if (country.equals("GB")) {
            movieCertification = "R18";
        }
        if (country.equals("FR")) {
            movieCertification = "18";
        }
        if (country.equals("ES")) {
            movieCertification = "PX";
        }
        if (country.equals("JP")) {
            movieCertification = "R18+";
        }
        if (country.equals("IT")) {
            movieCertification = "V.M.18";
        }
        if (country.equals("NL")) {
            movieCertification = "16";
        }
        // @formatter:on
        certification = Certification.getCertification(options.getCountry(), movieCertification);
        if (certification != null) {
            LOGGER.debug("AEBN: certification({})", certification);
            md.addCertification(certification);
        }

        // Plot and Tagline
        LOGGER.debug("AEBN: parse plot");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=about]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String moviePlot = cleanString(elements.first().text());
            md.storeMetadata(MediaMetadata.PLOT, moviePlot);
            // no separate tagline available, so extract the first sentence
            // from the movie plot
            String movieTagline = StrgUtils.substr(moviePlot, "^(.*?[.!?:])");
            LOGGER.debug("AEBN: tagline(" + movieTagline + ")");
            md.storeMetadata(MediaMetadata.TAGLINE, movieTagline);
        }

        // Actors
        LOGGER.debug("AEBN: parse actors");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=actor]");
        LOGGER.debug("AEBN: {} actors found", elements.size());
        for (Element anchor : elements) {
            String actorid = StrgUtils.substr(anchor.toString(), "starId=(\\d+)");
            String actorname = cleanString(anchor.select("[itemprop=name]").first().text());
            String actordetailsurl = BASE_DATAURL + anchor.attr("href");
            if (!actorname.isEmpty()) {
                LOGGER.debug("AEBN: add actor id({}), name({}), details({})", actorid, actorname,
                        actordetailsurl);
                MediaCastMember cm = new MediaCastMember();
                cm.setType(MediaCastMember.CastType.ACTOR);
                cm.setName(actorname);
                if (!actorid.isEmpty()) {
                    cm.setId(actorid);
                }

                // Actor detail page
                try {
                    Url starurl = new Url(actordetailsurl);
                    InputStream starurlstream = starurl.getInputStream();
                    Document stardocument = Jsoup.parse(starurlstream, "UTF-8", "");
                    starurlstream.close();
                    Elements elements2 = stardocument.getElementsByAttributeValue("class", "StarInfo");
                    if (elements2.size() == 0) {
                        LOGGER.debug("AEBN: no additional actor details found");
                    } else {
                        // Actor image
                        String actorimage = elements2.select("[itemprop=image]").first().attr("src");
                        LOGGER.debug("AEBN: actor image({})", actorimage);
                        if (!actorimage.isEmpty()) {
                            cm.setImageUrl(actorimage);
                        }
                        // Actor 'fanart' images
                        // unsure if this is ever shown in tmm
                        elements2 = stardocument.getElementsByAttributeValue("class", "StarDetailGallery")
                                .select("a");
                        LOGGER.debug("AEBN: {} gallery images found", elements2.size());
                        for (Element thumbnail : elements2) {
                            LOGGER.debug("AEBN: add fanart image({})", thumbnail.attr("href"));
                            cm.addFanart(thumbnail.attr("href"));
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("AEBN: Error downloading {}: {}", actordetailsurl, e);
                }

                md.addCastMember(cm);
            }
        }

        // Director
        LOGGER.debug("AEBN: parse director");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=director]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String directorid = StrgUtils.substr(elements.toString(), "directorID=(\\d+)");
            String directorname = cleanString(elements.select("[itemprop=name]").first().text());
            if (!directorname.isEmpty()) {
                MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                cm.setName(directorname);
                if (!directorid.isEmpty()) {
                    cm.setId(directorid);
                }
                cm.setImageUrl("");
                md.addCastMember(cm);
                LOGGER.debug("AEBN: add director id({}), name({})", directorid, directorname);
            }
        }

        // Original Title
        // if we have no original title, just copy the title
        if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
        }
    } catch (Exception e) {
        LOGGER.error("AEBN: Error parsing {}: {}", options.getResult().getUrl(), e);
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }//from w ww  .  j  a  va  2s.c om

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMovieParser.java

MediaMetadata getMovieMetadata(MediaScrapeOptions options) throws Exception {
    MediaMetadata md = new MediaMetadata(providerInfo.getId());

    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMediaMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMediaMetadata();
    }// w ww. j a  v  a  2s  .c o m

    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(providerInfo.getId(), imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<>(executor);

    // worker for imdb request (/reference) (everytime from www.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/reference");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(),
            options.getCountry().getAlpha2(), imdbSite);
    Future<Document> futureReference = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(),
            options.getCountry().getAlpha2(), imdbSite);
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")
            || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo")) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureReference.get();
    parseReferencePage(doc, options, md);

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = futurePlotsummary.get();
    parsePlotsummaryPage(doc, options, md);

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        Element title = doc.getElementById("tn15title");
        if (title != null) {
            Element element;
            // title
            Elements elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.setTitle(movieTitle);
            }
        }
    }

    // get the release info page
    Future<Document> futureReleaseinfo;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/releaseinfo");
    worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(),
            options.getCountry().getAlpha2(), imdbSite);
    futureReleaseinfo = compSvcImdb.submit(worker);
    doc = futureReleaseinfo.get();
    // parse original title here!!
    parseReleaseinfoPageAKAs(doc, options, md);

    // did we get a release date?
    if (md.getReleaseDate() == null
            || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("localReleaseDate")) {
        parseReleaseinfoPage(doc, options, md);
    }

    // get data from tmdb?
    if (futureTmdb != null && (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")
            || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo"))) {
        try {
            MediaMetadata tmdbMd = futureTmdb.get();
            if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb") && tmdbMd != null) {
                // tmdbid
                md.setId(MediaMetadata.TMDB, tmdbMd.getId(MediaMetadata.TMDB));
                // title
                if (StringUtils.isNotBlank(tmdbMd.getTitle())) {
                    md.setTitle(tmdbMd.getTitle());
                }
                // original title
                if (StringUtils.isNotBlank(tmdbMd.getOriginalTitle())) {
                    md.setOriginalTitle(tmdbMd.getOriginalTitle());
                }
                // tagline
                if (StringUtils.isNotBlank(tmdbMd.getTagline())) {
                    md.setTagline(tmdbMd.getTagline());
                }
                // plot
                if (StringUtils.isNotBlank(tmdbMd.getPlot())) {
                    md.setPlot(tmdbMd.getPlot());
                }
                // collection info
                if (StringUtils.isNotBlank(tmdbMd.getCollectionName())) {
                    md.setCollectionName(tmdbMd.getCollectionName());
                    md.setId(MediaMetadata.TMDB_SET, tmdbMd.getId(MediaMetadata.TMDB_SET));
                }
            }
            if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo")
                    && tmdbMd != null) {
                md.setId(MediaMetadata.TMDB_SET, tmdbMd.getId(MediaMetadata.TMDB_SET));
                md.setCollectionName(tmdbMd.getCollectionName());
            }
            md.setId(tmdbMd.getProviderId(), tmdbMd.getId(tmdbMd.getProviderId()));
        } catch (Exception ignored) {
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getOriginalTitle())) {
        md.setOriginalTitle(md.getTitle());
    }

    // populate id
    md.setId(ImdbMetadataProvider.providerInfo.getId(), imdbId);

    return md;
}