Example usage for org.jsoup.nodes Document getElementById

List of usage examples for org.jsoup.nodes Document getElementById

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementById.

Prototype

public Element getElementById(String id) 

Source Link

Document

Find an element by ID, including or under this element.

Usage

From source file:me.postar.postarv2.LocalService.java

@Override
public int onStartCommand(Intent intent, int flags, int startId) {

    Functions.getParcels(parcels, this);

    PowerManager pm = (PowerManager) getSystemService(Context.POWER_SERVICE);
    wl = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, "Postar");

    if (Functions.isConnectedToInternet(LocalService.this)) {
        Ion.with(this).load("GET", "https://e-racuni.postacg.me/PracenjePosiljaka/").asString().withResponse()
                .setCallback(new FutureCallback<Response<String>>() {
                    @Override//w w  w .  j a va 2  s  .c om
                    public void onCompleted(Exception e, Response<String> result) {
                        Document html = Jsoup.parse(result.getResult());
                        Element viewState = html.getElementById("__VIEWSTATE");
                        Element eventValidation = html.getElementById("__EVENTVALIDATION");
                        Element btnPronadji = html.getElementById("btnPronadji");

                        for (final PostParcel parcel : parcels) {
                            if (parcel.isAlarmOn()) {
                                Ion.with(LocalService.this)
                                        .load("POST", "https://e-racuni.postacg.me/PracenjePosiljaka/")
                                        .setBodyParameter("__VIEWSTATE", viewState.val())
                                        .setBodyParameter("__EVENTVALIDATION", eventValidation.val())
                                        .setBodyParameter("btnPronadji", btnPronadji.val())
                                        .setBodyParameter("txtPrijemniBroj", parcel.getParcelNo()).asString()
                                        .withResponse().setCallback(new FutureCallback<Response<String>>() {
                                            @Override
                                            public void onCompleted(Exception e,
                                                    final Response<String> result) {
                                                Document html = Jsoup.parse(result.getResult());
                                                Element table = html.getElementById("dgInfo");

                                                if (table != null) {
                                                    NotificationCompat.Builder mBuilder = new NotificationCompat.Builder(
                                                            LocalService.this)
                                                                    .setSmallIcon(R.drawable.ic_mail_outline)
                                                                    .setLargeIcon(BitmapFactory.decodeResource(
                                                                            getResources(),
                                                                            R.drawable.ic_mail_outline))
                                                                    .setAutoCancel(true);
                                                    mBuilder.setContentTitle(getString(R.string.message_title));
                                                    mBuilder.setContentText(
                                                            getString(R.string.message_content));
                                                    Intent activityIntent = new Intent(LocalService.this,
                                                            StatusActivity.class);
                                                    activityIntent.putExtra("parcel", parcel);
                                                    PendingIntent resultPendingIntent = PendingIntent
                                                            .getActivity(LocalService.this, 0, activityIntent,
                                                                    PendingIntent.FLAG_UPDATE_CURRENT);
                                                    mBuilder.setContentIntent(resultPendingIntent);
                                                    NotificationManager mNotificationManager = (NotificationManager) getSystemService(
                                                            Context.NOTIFICATION_SERVICE);

                                                    mNotificationManager.notify(12, mBuilder.build());

                                                    stopSelf();

                                                    wl.release();
                                                }
                                            }
                                        });
                            }
                        }
                    }
                });
    }

    return START_NOT_STICKY;
}

From source file:org.apache.nutch.protocol.httpclient.HttpFormAuthentication.java

private List<NameValuePair> getLoginFormParams(String pageContent) throws UnsupportedEncodingException {
    List<NameValuePair> params = new ArrayList<NameValuePair>();
    Document doc = Jsoup.parse(pageContent);
    Element loginform = doc.getElementById(authConfigurer.getLoginFormId());
    if (loginform == null) {
        LOG.debug("No form element found with 'id' = {}, trying 'name'.", authConfigurer.getLoginFormId());
        loginform = doc.select("form[name=" + authConfigurer.getLoginFormId() + "]").first();
        if (loginform == null) {
            LOG.debug("No form element found with 'name' = {}", authConfigurer.getLoginFormId());
            throw new IllegalArgumentException("No form exists: " + authConfigurer.getLoginFormId());
        }/*  w  w w  .  j av  a2  s .c om*/
    }
    Elements inputElements = loginform.getElementsByTag("input");
    // skip fields in removedFormFields or loginPostData
    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");
        if (authConfigurer.getLoginPostData().containsKey(key)
                || authConfigurer.getRemovedFormFields().contains(key)) {
            // value = loginPostData.get(key);
            continue;
        }
        params.add(new NameValuePair(key, value));
    }
    // add key and value in loginPostData
    for (Entry<String, String> entry : authConfigurer.getLoginPostData().entrySet()) {
        params.add(new NameValuePair(entry.getKey(), entry.getValue()));
    }
    return params;
}

From source file:org.apache.nutch.protocol.httpclient.proxy.HttpFormAuthentication.java

private List<NameValuePair> getLoginFormParams(String pageContent) throws UnsupportedEncodingException {
    List<NameValuePair> params = new ArrayList<NameValuePair>();
    Document doc = Jsoup.parse(pageContent);
    Element loginform = doc.getElementById(authConfigurer.getLoginFormId());
    if (loginform == null) {
        LOGGER.debug("No form element found with 'id' = {}, trying 'name'.", authConfigurer.getLoginFormId());
        loginform = doc.select("form[name=" + authConfigurer.getLoginFormId() + "]").first();
        if (loginform == null) {
            LOGGER.debug("No form element found with 'name' = {}", authConfigurer.getLoginFormId());
            throw new IllegalArgumentException("No form exists: " + authConfigurer.getLoginFormId());
        }//  w  ww  . j  a  v a  2s.c o m
    }
    Elements inputElements = loginform.getElementsByTag("input");
    // skip fields in removedFormFields or loginPostData
    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");
        if (authConfigurer.getLoginPostData().containsKey(key)
                || authConfigurer.getRemovedFormFields().contains(key)) {
            // value = loginPostData.get(key);
            continue;
        }
        params.add(new NameValuePair(key, value));
    }
    // add key and value in loginPostData
    for (Entry<String, String> entry : authConfigurer.getLoginPostData().entrySet()) {
        params.add(new NameValuePair(entry.getKey(), entry.getValue()));
    }
    return params;
}

From source file:org.cellcore.code.engine.page.extractor.starcity.STCPageDataExtractor.java

@Override
protected String getName(Document doc) throws UnsupportedCardException {
    if (!doc.select("h3").select(":contains(Foil)").isEmpty()) {
        throw new UnsupportedCardException("foil");
    }/* w  ww.ja v a 2 s.c  o m*/
    String href = doc.baseUri();
    String code = href.substring(href.lastIndexOf("=") + 1, href.length());
    jsonProc(code, doc);
    if (doc.getElementById("custom_card_name_STC") != null) {
        return doc.getElementById("custom_card_name_STC").text();
    }
    return null;
}

From source file:org.cellcore.code.engine.page.extractor.starcity.STCPageDataExtractor.java

@Override
protected float getPrice(Document doc) {
    if (doc.getElementById("custom_card_price_STC") != null) {
        return Float.parseFloat(doc.getElementById("custom_card_price_STC").text());
    }/*  w  ww  . jav a  2  s .  com*/
    return -1;
}

From source file:org.craftercms.social.migration.controllers.MainController.java

protected void getHtml(final FileWriter writer) throws TransformerException, IOException {
    final URL in = getClass().getResource(
            MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    if (in == null) {
        log.error("Unable to find {} "
                + MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    }/*from  w  w  w. j ava2s. c o m*/
    final Document loggingDoc = Jsoup.parse(IOUtils.toString(in));
    final Element logs = loggingDoc.getElementById("logs");
    for (Object o : logTable.getItems()) {
        if (o instanceof UserLogEntry) {
            UserLogEntry userLogEntry = (UserLogEntry) o;
            String dateFormat = new SimpleDateFormat("yyyy MM dd hh:mm:ss zzz").format(userLogEntry.getDate());
            final Element tr = loggingDoc.createElement("tr");
            tr.attr("class", userLogEntry.getLevel().getCssClass());
            final Element tmigrator = loggingDoc.createElement("td");
            final Element tdate = loggingDoc.createElement("td");
            final Element tmessage = loggingDoc.createElement("td");
            tmessage.attr("class", "text-center");
            tmessage.text(userLogEntry.getMessage());
            tdate.text(dateFormat);
            tmigrator.text(userLogEntry.getSource());
            tr.appendChild(tmigrator);
            tr.appendChild(tdate);
            tr.appendChild(tmessage);
            logs.appendChild(tr);
        }
    }
    IOUtils.write(loggingDoc.toString(), writer);
    //        Transformer transformer = TransformerFactory.newInstance().newTransformer();
    //        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
    //        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
    //        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    //        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    //        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
    //        transformer.transform(new DOMSource(loggingDoc), new StreamResult(writer));
    writer.flush();
    writer.close();
}

From source file:org.dataconservancy.ui.it.UiConfigurationActionBeanIT.java

/**
 * Insures that an XSD schema, composed of a single schema document (no &lt;xsd:include> statements), can be
 * added to the system.//from  w ww  .  j  a va 2  s . c  om
 *
 * @throws Exception
 */
@Test
public void testAddMavenPomMetadataFormat() throws Exception {
    // Get a count of the current number of metadata formats in the system
    List<UiConfigurationActionBean.MetaDataFormatTransport> mdfts = getMdfs();
    int mdfCount = mdfts.size();

    // Compose the mdft to add
    final UiConfigurationActionBean.MetaDataFormatTransport mdft = new UiConfigurationActionBean()
            .getNewMetadataFormatTransport();
    final AddMetadataFormatRequest req = new AddMetadataFormatRequest(urlConfig);
    // A unique name insures that this Metadata Format doesn't exist yet in the system (but we verify this
    // assumption anyway)
    final String name = UUID.randomUUID().toString();
    final boolean validates = true;
    final String version = this.getClass().getSimpleName() + " Maven 4.0.0 POM";
    final boolean appliesToCollection = false;
    final boolean appliesToProject = false;
    final boolean appliesToItem = true;
    final List<String> disciplineIds = Arrays.asList("dc:discipline:Biology");
    mdft.setName(name);
    mdft.setVersion(version);
    mdft.setSchemaURL(MAVEN_MODEL_4_0_0_SCHEMA_URL);
    mdft.setSchemaSource(MAVEN_MODEL_4_0_0_SCHEMA_URL);
    mdft.setValidates(validates);
    mdft.setAppliesToCollection(appliesToCollection);
    mdft.setAppliesToProject(appliesToProject);
    mdft.setAppliesToItem(appliesToItem);
    mdft.setDisciplineIds(disciplineIds);

    // Insure that the new Metadata Format being added isn't in the list of existing metadata formats
    assertFalse(mdfts.contains(mdft));

    // Add the metadata format
    HttpAssert.ResponseHolder holder = new HttpAssert.ResponseHolder();

    HttpAssert.assertStatus(hc, req.asHttpPost(mdft), 200, holder);

    final String html = IOUtils.toString(holder.getBody());
    assertNotNull(html);
    final Document dom = Jsoup.parse(html);
    assertNotNull(dom);
    Element nameElement = dom.getElementById("schemaName");
    assertNotNull(nameElement);
    String testText = nameElement.text();
    assertTrue(nameElement.text().equalsIgnoreCase("Schema Name: " + name));

    Element versionElement = dom.getElementById("schemaVersion");
    assertNotNull(versionElement);
    assertTrue(versionElement.text().equalsIgnoreCase("Version: " + version));

    Element namespacesElement = dom.getElementById("namespaces");
    //assertEquals(2, namespacesElement.childNodeSize());

    Elements namespaceElements = namespacesElement.children();
    boolean foundPrefixedNamespace = false;
    boolean foundNamespace = false;
    for (Element namespaceElement : namespaceElements) {
        String namespaceText = namespaceElement.text();
        if (namespaceText.contains("Namespace:")) {
            if (namespaceText.contains("Prefix")) {
                assertTrue(namespaceText
                        .equalsIgnoreCase("Namespace: http://www.w3.org/2001/XMLSchema Prefix: xs"));
                foundPrefixedNamespace = true;
            } else {
                assertTrue(namespaceText.equalsIgnoreCase("Namespace: http://maven.apache.org/POM/4.0.0"));
                foundNamespace = true;
            }
        }
    }

    assertTrue(foundPrefixedNamespace);
    assertTrue(foundNamespace);

    // Now we need to persist the format in the system by emulating a click on the "save" button
    HttpAssert.assertStatus(hc, new SaveMetadataFormatRequest(urlConfig).asHttpPost(), 200);

    // insure that the format we've added was added properly (all the values for table columns were
    // persisted properly)
    mdfts = getMdfs();
    assertTrue(mdfts.contains(mdft));
    assertEquals(mdfCount + 1, mdfts.size());
}

From source file:org.fcrepo.apix.integration.LoaderIT.java

@Test
public void htmlMinimalTest() throws Exception {

    final String SERVICE_RESPONSE_BODY = "BODY";

    optionsResponse.set(// w w  w .jav a  2s. com
            IOUtils.toString(testResource("objects/options_LoaderIT_minimal.ttl").representation(), "utf8"));
    serviceResponse.set(SERVICE_RESPONSE_BODY);

    final Document html = attempt(60,
            () -> Jsoup.connect(LOADER_URI).method(Method.GET).timeout(1000).execute().parse());
    final FormElement form = ((FormElement) html.getElementById("uriForm"));
    form.getElementById("uri").val(serviceEndpoint);

    final Response response = form.submit().ignoreHttpErrors(true).followRedirects(false).execute();
    update();

    assertEquals("OPTIONS", requestToService.getHeader(Exchange.HTTP_METHOD));
    assertEquals(303, response.statusCode());
    assertNotNull(response.header("Location"));

    // Verify that extension works!

    // Get the intercept/proxy URI for a fedora container
    final URI container = routing.of(REQUEST_URI).interceptUriFor(objectContainer);

    // Deposit an object into the container
    final URI deposited = client.post(container).slug("LoaderIT_htmlMinimalTest")
            .body(IOUtils.toInputStream("<> a <test:LoaderIT#minimal> .", "utf8"), "text/turtle").perform()
            .getLocation();

    // Get the service discovery document
    final URI discoveryDoc = client.options(deposited).perform().getLinkHeaders("service").get(0);

    // Invoke the "minimal" service, and verify that the response body is as expected
    final String body = attempt(10,
            () -> IOUtils.toString(
                    client.get(serviceEndpoints(discoveryDoc).get(SERVICE_MINIMAL)).perform().getBody(),
                    "utf8"));
    assertEquals(SERVICE_RESPONSE_BODY, body);
}

From source file:org.loklak.api.search.MeetupsCrawlerService.java

public static SusiThought crawlMeetups(String url) {

    Document meetupHTML = null;
    String meetupGroupName = null;
    String meetupType = null;// w  w w . ja v a2  s.  c om
    String groupDescription = null;
    String groupLocality = null;
    String groupCountry = null;
    String latitude = null;
    String longitude = null;
    String imageLink = null;
    Elements topicList = null;
    String[] topicListArray = new String[100];
    Integer numberOfTopics = 0;
    Elements recentMeetupsSection = null;
    Integer numberOfRecentMeetupsShown = 0;
    Integer i = 0, j = 0;
    String recentMeetupsResult[][] = new String[100][3];
    // recentMeetupsResult[i][0] == date && time
    // recentMeetupsResult[i][1] == Attendance && Review
    // recentMeetupsResult[i][2] == Information

    JSONObject result = new JSONObject();

    try {
        meetupHTML = Jsoup.connect(url).userAgent("Mozilla)").get();

    } catch (Exception e) {
        e.printStackTrace();
    }

    meetupGroupName = meetupHTML.getElementsByAttributeValue("property", "og:title").attr("content");
    result.put("group_name", meetupGroupName);

    meetupType = meetupHTML.getElementsByAttributeValue("property", "og:type").attr("content");
    result.put("meetup_type", meetupType);

    groupDescription = meetupHTML.getElementById("groupDesc").text();
    result.put("group_description", groupDescription);

    groupLocality = meetupHTML.getElementsByAttributeValue("property", "og:locality").attr("content");
    result.put("group_locality", groupLocality);

    groupCountry = meetupHTML.getElementsByAttributeValue("property", "og:country-name").attr("content");
    result.put("group_country_code", groupCountry);

    latitude = meetupHTML.getElementsByAttributeValue("property", "og:latitude").attr("content");
    result.put("group_latitude", latitude);

    longitude = meetupHTML.getElementsByAttributeValue("property", "og:longitude").attr("content");
    result.put("group_longitude", longitude);

    imageLink = meetupHTML.getElementsByAttributeValue("property", "og:image").attr("content");
    result.put("group_imageLink", imageLink);

    topicList = meetupHTML.getElementById("topic-box-2012").getElementsByTag("a");

    int p = 0;
    for (Element topicListStringsIterator : topicList) {
        topicListArray[p] = topicListStringsIterator.text().toString();
        p++;
    }
    numberOfTopics = p;

    JSONArray groupTopics = new JSONArray();
    for (int l = 0; l < numberOfTopics; l++) {
        groupTopics.put(l, topicListArray[l]);
    }
    result.put("group_topics", groupTopics);

    recentMeetupsSection = meetupHTML.getElementById("recentMeetups").getElementsByTag("p");

    i = 0;
    j = 0;

    for (Element recentMeetups : recentMeetupsSection) {
        if (j % 3 == 0) {
            j = 0;
            i++;
        }

        recentMeetupsResult[i][j] = recentMeetups.text().toString();
        j++;

    }

    numberOfRecentMeetupsShown = i;

    JSONArray recentMeetups = new JSONArray();
    for (int k = 1; k < numberOfRecentMeetupsShown; k++) {
        JSONObject obj = new JSONObject();
        obj.put("recent_meetup_number", k);
        obj.put("date_time", recentMeetupsResult[k][0]);
        obj.put("attendance", recentMeetupsResult[k][1]);
        obj.put("information", recentMeetupsResult[k][2]);
        recentMeetups.put(obj);
    }

    result.put("recent_meetups", recentMeetups);

    JSONArray meetupsCrawlerResultArray = new JSONArray();
    meetupsCrawlerResultArray.put(result);

    SusiThought json = new SusiThought();
    json.setData(meetupsCrawlerResultArray);
    return json;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }//from   w  w w.j a  v  a2s .c o  m

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}