Example usage for org.jsoup.nodes Document getElementsByAttributeValueContaining

List of usage examples for org.jsoup.nodes Document getElementsByAttributeValueContaining

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByAttributeValueContaining.

Prototype

public Elements getElementsByAttributeValueContaining(String key, String match) 

Source Link

Document

Find elements that have attributes whose value contains the match string.

Usage

From source file:ph.fingra.statisticsweb.service.DashBoardServiceImpl.java

private void checkAppIcon(App app) {
    if ((app.getAppInfo() != null && app.getAppInfo().getSmallicon() != null) || !app.hasValidAppId())
        return;//from w  w w . ja v a  2  s  . c  om
    if (app.getAppInfo() == null) {
        AppInfo appInfo = new AppInfo();
        appInfo.setAppkey(app.getAppkey());
        app.setAppInfo(appInfo);
    }
    if (AppPlatform.valueOf(app.getPlatform()) == AppPlatform.IPHONE) {
        ResponseEntity<String> response = restTemplate
                .getForEntity("https://itunes.apple.com/lookup?id={appId}", String.class, app.getAppid());
        if (response.getStatusCode() != HttpStatus.OK)
            return;
        JsonObject result = (JsonObject) new JsonParser().parse(response.getBody());
        JsonArray arr = result.getAsJsonArray("results");
        if (result.getAsJsonPrimitive("resultCount").getAsInt() != 1)
            return;
        JsonPrimitive smallIconUrl = arr.get(0).getAsJsonObject().getAsJsonPrimitive("artworkUrl60");
        app.getAppInfo().setSmallicon(smallIconUrl.getAsString());
    } else {
        Document d = null;
        try {
            d = Jsoup.connect("https://play.google.com/store/apps/details?id=" + app.getAppid()).get();
            Elements div = d.getElementsByAttributeValueContaining("class", "cover-container");
            //System.out.println(div.hasClass("cover-container"));
            if (div.size() == 0)
                return;
            String path = div.get(0).getElementsByTag("img").attr("src");
            app.getAppInfo().setSmallicon(path);
        } catch (IOException e) {
            e.printStackTrace();
            return;
        }
    }
    appDao.updateAppInfo(app.getAppInfo());

}

From source file:com.mycompany.parcinghtml.ParsingClassPlayers.java

public void downloadSource() throws SQLException {
    //ds = prepareDataSource();
    String sql = "INSERT INTO PLAYERS(NAME,AGE,HEIGHT,WEIGHT,PLAYERNUM,POSITION,PLAYERID) VALUES(?,?,?,?,?,?,?)";
    ArrayList<String> duplicity = new ArrayList<>();
    int playerID = 1;

    for (int i = 2015; i > 2004; i--) {
        Document doc = null;
        try {//from   w w  w .j a  va  2 s.c  om
            doc = Jsoup.connect("http://www.hcsparta.cz/soupiska.asp?sezona=" + Integer.toString(i)).get();
        } catch (IOException e) {
            System.out.println(e.getMessage());
        }
        if (doc == null) {
            System.out.println("doc is null");
            return;
        }

        Elements posNum;
        Elements elList;
        posNum = doc.getElementsByAttributeValueContaining("class", "soupiska");
        //elList = doc.getElementsByAttributeValueContaining("id", "soupiska");
        for (int j = 0; j < 3; j++) {
            elList = posNum.get(j).getElementsByAttributeValueContaining("id", "soupiska");

            for (Element item : elList) {
                String[] secondName = item.child(2).text().split(" ");
                if (duplicity.contains(item.child(2).text()))
                    continue;
                duplicity.add(item.child(2).text());
                try (Connection conn = ds.getConnection()) {

                    try (PreparedStatement st = conn.prepareStatement(sql)) {
                        st.setString(1, item.child(2).text());
                        String[] age = item.child(4).text().split(" ");
                        st.setInt(2, Integer.parseInt(age[0]));
                        String[] height = item.child(5).text().split(" ");
                        st.setInt(3, Integer.parseInt(height[0]));
                        String[] weight = item.child(6).text().split(" ");
                        st.setInt(4, Integer.parseInt(weight[0]));

                        try {
                            st.setInt(5, Integer.parseInt(item.child(0).text()));
                        } catch (NumberFormatException ex) {
                            st.setInt(5, 0);
                        }
                        st.setInt(6, j);
                        st.setInt(7, playerID);
                        int addedRows = st.executeUpdate();
                        playerID++;
                    }
                } catch (SQLException ex) {
                    throw new SQLException(ex.getMessage(), ex.fillInStackTrace());
                }

            }
        }

    }

}

From source file:com.thesmartweb.swebrank.WebParser.java

/**
 * Method to get the various html stats//from ww w.j  a  v a 2s.com
 * @param link_html the url to analyze
 * @return flag if we got all the stats
 */
public boolean gethtmlstats(String link_html) {
    try {
        Document doc = Jsoup.connect(link_html).timeout(10 * 1000).get();
        Elements schemas = doc.getElementsByAttributeValueContaining("itemtype", "schema.org");
        Elements microdata = doc.getElementsByAttribute("itemtype");
        Elements microformats_vcard = doc.getElementsByAttributeValueContaining("class", "vcard");
        Elements microformats_hreview = doc.getElementsByAttributeValueContaining("class", "hreview");
        Elements microformats_vevent = doc.getElementsByAttributeValueContaining("class", "vevent");
        Elements microformats_vcalendar = doc.getElementsByAttributeValueContaining("class", "vcalendar");
        Elements microformats_vgeo = doc.getElementsByAttributeValueContaining("class", "geo");
        Elements microformats_vadrn = doc.getElementsByAttributeValueContaining("class", "ardn");
        Elements microformats_acquaintance = doc.getElementsByAttributeValueContaining("rel", "link_html");
        Elements microformats_alternate = doc.getElementsByAttributeValueContaining("rel", "alternate");
        Elements microformats_appendix = doc.getElementsByAttributeValueContaining("rel", "appendix");
        Elements microformats_bookmark = doc.getElementsByAttributeValueContaining("rel", "bookmark");
        Elements microformats_chapter = doc.getElementsByAttributeValueContaining("rel", "chapter");
        Elements microformats_child = doc.getElementsByAttributeValueContaining("rel", "child");
        Elements microformats_coll = doc.getElementsByAttributeValueContaining("rel", "colleague");
        Elements microformats_contact = doc.getElementsByAttributeValueContaining("rel", "contact");
        Elements microformats_contents = doc.getElementsByAttributeValueContaining("rel", "contents");
        Elements microformats_copyright = doc.getElementsByAttributeValueContaining("rel", "copyright");
        Elements microformats_coresident = doc.getElementsByAttributeValueContaining("rel", "co-resident");
        Elements microformats_coworker = doc.getElementsByAttributeValueContaining("rel", "co-worker");
        Elements microformats_crush = doc.getElementsByAttributeValueContaining("rel", "crush");
        Elements microformats_date = doc.getElementsByAttributeValueContaining("rel", "date");
        Elements microformats_friend = doc.getElementsByAttributeValueContaining("rel", "friend");
        Elements microformats_glossary = doc.getElementsByAttributeValueContaining("rel", "glossary");
        Elements microformats_help = doc.getElementsByAttributeValueContaining("rel", "help");
        Elements microformats_itsrules = doc.getElementsByAttributeValueContaining("rel", "its-rules");
        Elements microformats_kin = doc.getElementsByAttributeValueContaining("rel", "kin");
        Elements microformats_license = doc.getElementsByAttributeValueContaining("rel", "license");
        Elements microformats_me = doc.getElementsByAttributeValueContaining("rel", "me");
        Elements microformats_met = doc.getElementsByAttributeValueContaining("rel", "met");
        Elements microformats_muse = doc.getElementsByAttributeValueContaining("rel", "muse");
        Elements microformats_neighbor = doc.getElementsByAttributeValueContaining("rel", "neighbor");
        Elements microformats_next = doc.getElementsByAttributeValueContaining("rel", "next");
        Elements microformats_nofollow = doc.getElementsByAttributeValueContaining("rel", "nofollow");
        Elements microformats_parent = doc.getElementsByAttributeValueContaining("rel", "parent");
        Elements microformats_prev = doc.getElementsByAttributeValueContaining("rel", "prev");
        Elements microformats_previous = doc.getElementsByAttributeValueContaining("rel", "previous");
        Elements microformats_section = doc.getElementsByAttributeValueContaining("rel", "section");
        Elements microformats_sibling = doc.getElementsByAttributeValueContaining("rel", "sibling");
        Elements microformats_spouse = doc.getElementsByAttributeValueContaining("rel", "spouse");
        Elements microformats_start = doc.getElementsByAttributeValueContaining("rel", "start");
        Elements microformats_stylesheet = doc.getElementsByAttributeValueContaining("rel", "stylesheet");
        Elements microformats_subsection = doc.getElementsByAttributeValueContaining("rel", "subsection");
        Elements microformats_sweetheart = doc.getElementsByAttributeValueContaining("rel", "sweetheart");
        Elements microformats_tag = doc.getElementsByAttributeValueContaining("rel", "tag");
        Elements microformats_toc = doc.getElementsByAttributeValueContaining("rel", "toc");
        Elements microformats_transformation = doc.getElementsByAttributeValueContaining("rel",
                "transformation");
        Elements microformats_appleti = doc.getElementsByAttributeValueContaining("rel", "apple-touch-icon");
        Elements microformats_appletip = doc.getElementsByAttributeValueContaining("rel",
                "apple-touch-icon-precomposed");
        Elements microformats_appletsi = doc.getElementsByAttributeValueContaining("rel",
                "apple-touch-startup-image");
        Elements microformats_attachment = doc.getElementsByAttributeValueContaining("rel", "attachment");
        Elements microformats_can = doc.getElementsByAttributeValueContaining("rel", "canonical");
        Elements microformats_categ = doc.getElementsByAttributeValueContaining("rel", "category");
        Elements microformats_compon = doc.getElementsByAttributeValueContaining("rel", "component");
        Elements microformats_chromewebi = doc.getElementsByAttributeValueContaining("rel",
                "chrome-webstore-item");
        Elements microformats_disclosure = doc.getElementsByAttributeValueContaining("rel", "disclosure");
        Elements microformats_discussion = doc.getElementsByAttributeValueContaining("rel", "discussion");
        Elements microformats_dns = doc.getElementsByAttributeValueContaining("rel", "dns-prefetch");
        Elements microformats_edit = doc.getElementsByAttributeValueContaining("rel", "edit");
        Elements microformats_edituri = doc.getElementsByAttributeValueContaining("rel", "EditURI");
        Elements microformats_entrycon = doc.getElementsByAttributeValueContaining("rel", "entry-content");
        Elements microformats_external = doc.getElementsByAttributeValueContaining("rel", "external");
        Elements microformats_home = doc.getElementsByAttributeValueContaining("rel", "home");
        Elements microformats_hub = doc.getElementsByAttributeValueContaining("rel", "hub");
        Elements microformats_inreplyto = doc.getElementsByAttributeValueContaining("rel", "in-reply-to");
        Elements microformats_index = doc.getElementsByAttributeValueContaining("rel", "index");
        Elements microformats_indieauth = doc.getElementsByAttributeValueContaining("rel", "indieauth");
        Elements microformats_issues = doc.getElementsByAttributeValueContaining("rel", "issues");
        Elements microformats_lightbox = doc.getElementsByAttributeValueContaining("rel", "lightbox");
        Elements microformats_meta = doc.getElementsByAttributeValueContaining("rel", "meta");
        Elements microformats_openid = doc.getElementsByAttributeValueContaining("rel", "opendid");
        Elements microformats_p3pv1 = doc.getElementsByAttributeValueContaining("rel", "p3pv1");
        Elements microformats_pgpkey = doc.getElementsByAttributeValueContaining("rel", "pgpkey");
        Elements microformats_pingback = doc.getElementsByAttributeValueContaining("rel", "pingback");
        Elements microformats_prerender = doc.getElementsByAttributeValueContaining("rel", "prerender");
        Elements microformats_profile = doc.getElementsByAttributeValueContaining("rel", "profile");
        Elements microformats_rendition = doc.getElementsByAttributeValueContaining("rel", "rendition");
        Elements microformats_service = doc.getElementsByAttributeValueContaining("rel", "service");
        Elements microformats_shortlink = doc.getElementsByAttributeValueContaining("rel", "shortlink");
        Elements microformats_sidebar = doc.getElementsByAttributeValueContaining("rel", "sidebar");
        Elements microformats_sitemap = doc.getElementsByAttributeValueContaining("rel", "sitemap");
        Elements microformats_subresource = doc.getElementsByAttributeValueContaining("rel", "subresource");
        Elements microformats_syndication = doc.getElementsByAttributeValueContaining("rel", "syndication");
        Elements microformats_timesheet = doc.getElementsByAttributeValueContaining("rel", "timesheet");
        Elements microformats_webmention = doc.getElementsByAttributeValueContaining("rel", "webmention");
        Elements microformats_widget = doc.getElementsByAttributeValueContaining("rel", "widget");
        Elements microformats_wlwmanifest = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest");
        Elements microformats_imgsrc = doc.getElementsByAttributeValueContaining("rel", "image_src");
        Elements microformats_cmisacl = doc.getElementsByAttributeValueContaining("rel",
                "http://docs.oasis-open.org/ns/cmis/link/200908/acl");
        Elements microformats_stylesheetless = doc.getElementsByAttributeValueContaining("rel",
                "stylesheet/less");
        Elements microformats_accessibility = doc.getElementsByAttributeValueContaining("rel", "accessibility");
        Elements microformats_biblio = doc.getElementsByAttributeValueContaining("rel", "bibliography");
        Elements microformats_cite = doc.getElementsByAttributeValueContaining("rel", "cite");
        Elements microformats_group = doc.getElementsByAttributeValueContaining("rel", "group");
        Elements microformats_jslicence = doc.getElementsByAttributeValueContaining("rel", "jslicense");
        Elements microformats_longdesc = doc.getElementsByAttributeValueContaining("rel", "longdesc");
        Elements microformats_map = doc.getElementsByAttributeValueContaining("rel", "map");
        Elements microformats_member = doc.getElementsByAttributeValueContaining("rel", "member");
        Elements microformats_source = doc.getElementsByAttributeValueContaining("rel", "source");
        Elements microformats_status = doc.getElementsByAttributeValueContaining("rel", "status");
        Elements microformats_archive = doc.getElementsByAttributeValueContaining("rel", "archive");
        Elements microformats_archives = doc.getElementsByAttributeValueContaining("rel", "archives");
        Elements microformats_comment = doc.getElementsByAttributeValueContaining("rel", "comment");
        Elements microformats_contribution = doc.getElementsByAttributeValueContaining("rel", "contribution");
        Elements microformats_endorsed = doc.getElementsByAttributeValueContaining("rel", "endorsed");
        Elements microformats_fan = doc.getElementsByAttributeValueContaining("rel", "fan");
        Elements microformats_feed = doc.getElementsByAttributeValueContaining("rel", "feed");
        Elements microformats_footnote = doc.getElementsByAttributeValueContaining("rel", "footnote");
        Elements microformats_icon = doc.getElementsByAttributeValueContaining("rel", "icon");
        Elements microformats_kinstyle = doc.getElementsByAttributeValueContaining("rel", "kinetic-stylesheet");
        Elements microformats_prettyphoto = doc.getElementsByAttributeValueContaining("rel", "prettyPhoto");
        Elements microformats_clearbox = doc.getElementsByAttributeValueContaining("rel", "clearbox");
        Elements microformats_made = doc.getElementsByAttributeValueContaining("rel", "made");
        Elements microformats_microsummary = doc.getElementsByAttributeValueContaining("rel", "microsummary");
        Elements microformats_noreferrer = doc.getElementsByAttributeValueContaining("rel", "noreferrer");
        Elements microformats_permalink = doc.getElementsByAttributeValueContaining("rel", "permalink");
        Elements microformats_popover = doc.getElementsByAttributeValueContaining("rel", "popover");
        Elements microformats_prefetch = doc.getElementsByAttributeValueContaining("rel", "prefetch");
        Elements microformats_publickey = doc.getElementsByAttributeValueContaining("rel", "publickey");
        Elements microformats_publisher = doc.getElementsByAttributeValueContaining("rel", "publisher");
        Elements microformats_referral = doc.getElementsByAttributeValueContaining("rel", "referral");
        Elements microformats_related = doc.getElementsByAttributeValueContaining("rel", "related");
        Elements microformats_replies = doc.getElementsByAttributeValueContaining("rel", "replies");
        Elements microformats_resource = doc.getElementsByAttributeValueContaining("rel", "resource");
        Elements microformats_search = doc.getElementsByAttributeValueContaining("rel", "search");
        Elements microformats_sponsor = doc.getElementsByAttributeValueContaining("rel", "sponsor");
        Elements microformats_tooltip = doc.getElementsByAttributeValueContaining("rel", "tooltip");
        Elements microformats_trackback = doc.getElementsByAttributeValueContaining("rel", "trackback");
        Elements microformats_unendorsed = doc.getElementsByAttributeValueContaining("rel", "unendorsed");
        Elements microformats_user = doc.getElementsByAttributeValueContaining("rel", "user");
        Elements microformats_wlw = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest");
        //-----microformats2
        Elements microformats2_hadr = doc.getElementsByAttributeValueContaining("class", "h-adr");
        Elements microformats2_hcard = doc.getElementsByAttributeValueContaining("class", "h-card");
        Elements microformats2_hentry = doc.getElementsByAttributeValueContaining("class", "h-entry");
        Elements microformats2_hevent = doc.getElementsByAttributeValueContaining("class", "h-event");
        Elements microformats2_hgeo = doc.getElementsByAttributeValueContaining("class", "h-geo");
        Elements microformats2_hitem = doc.getElementsByAttributeValueContaining("class", "h-item");
        Elements microformats2_hproduct = doc.getElementsByAttributeValueContaining("class", "h-product");
        Elements microformats2_hrecipe = doc.getElementsByAttributeValueContaining("class", "h-recipe");
        Elements microformats2_hresume = doc.getElementsByAttributeValueContaining("class", "h-resume");
        Elements microformats2_hreview = doc.getElementsByAttributeValueContaining("class", "h-review");
        Elements microformats2_hreviewagg = doc.getElementsByAttributeValueContaining("class",
                "h-review-aggregate");
        Elements foaf_autodiscoveries = doc.getElementsByAttributeValueContaining("href", "foaf");
        Elements foaf_types = doc.getElementsByAttributeValueContaining("type", "foaf");
        Elements media = doc.select("embed");
        Elements iframes = doc.select("iframe");
        Elements script_el = doc.select("script");
        Elements reltags = doc.select("link[rel]");
        Elements reltags_a = doc.select("a[rel]");
        number_embeded_videos = media.size();
        scripts_number = script_el.size();
        frames_number = iframes.size();
        nschem = schemas.size();
        hreln = reltags.size() + reltags_a.size();
        foaf = foaf_autodiscoveries.size() + foaf_types.size();
        micron1 = microformats_cmisacl.size() + microformats_vcard.size() + microformats_vevent.size()
                + microformats_hreview.size() + microformats_vgeo.size() + microformats_vcalendar.size()
                + microformats_vadrn.size() + microformats_acquaintance.size() + microformats_alternate.size()
                + microformats_appendix.size() + microformats_biblio.size() + microformats_bookmark.size()
                + microformats_chapter.size() + microformats_child.size() + microformats_coll.size()
                + microformats_contact.size() + microformats_contents.size() + microformats_copyright.size()
                + microformats_coresident.size() + microformats_coworker.size() + microformats_crush.size()
                + microformats_date.size() + microformats_friend.size() + microformats_glossary.size()
                + microformats_help.size() + microformats_itsrules.size() + microformats_kin.size()
                + microformats_license.size() + microformats_me.size() + microformats_met.size()
                + microformats_muse.size() + microformats_neighbor.size() + microformats_next.size()
                + microformats_nofollow.size() + microformats_parent.size() + microformats_prev.size()
                + microformats_previous.size() + microformats_section.size() + microformats_sibling.size()
                + microformats_spouse.size() + microformats_start.size() + microformats_stylesheet.size()
                + microformats_subsection.size() + microformats_sweetheart.size() + microformats_tag.size()
                + microformats_toc.size() + microformats_transformation.size() + microformats_appleti.size()
                + microformats_appletip.size() + microformats_appletsi.size() + microformats_attachment.size()
                + microformats_can.size() + microformats_categ.size() + microformats_compon.size()
                + microformats_chromewebi.size() + microformats_disclosure.size()
                + microformats_discussion.size() + microformats_dns.size() + microformats_edit.size()
                + microformats_edituri.size() + microformats_entrycon.size() + microformats_external.size()
                + microformats_home.size() + microformats_hub.size() + microformats_inreplyto.size()
                + microformats_index.size() + microformats_indieauth.size() + microformats_issues.size()
                + microformats_lightbox.size() + microformats_meta.size() + microformats_openid.size()
                + microformats_p3pv1.size() + microformats_pgpkey.size() + microformats_pingback.size()
                + microformats_prerender.size() + microformats_profile.size() + microformats_rendition.size()
                + microformats_service.size() + microformats_shortlink.size() + microformats_sidebar.size()
                + microformats_sitemap.size() + microformats_subresource.size()
                + microformats_syndication.size() + microformats_timesheet.size()
                + microformats_webmention.size() + microformats_widget.size() + microformats_wlwmanifest.size()
                + microformats_imgsrc.size() + microformats_imgsrc.size() + microformats_stylesheetless.size()
                + microformats_accessibility.size() + microformats_accessibility.size()
                + microformats_cite.size() + microformats_group.size() + microformats_jslicence.size()
                + microformats_longdesc.size() + microformats_map.size() + microformats_member.size()
                + microformats_source.size() + microformats_status.size() + microformats_archive.size()
                + microformats_archives.size() + microformats_comment.size() + microformats_contribution.size()
                + microformats_endorsed.size() + microformats_fan.size() + microformats_feed.size()
                + microformats_footnote.size() + microformats_icon.size() + microformats_kinstyle.size()
                + microformats_prettyphoto.size() + microformats_clearbox.size() + microformats_made.size()
                + microformats_microsummary.size() + microformats_noreferrer.size()
                + microformats_permalink.size() + microformats_popover.size() + microformats_prefetch.size()
                + microformats_publickey.size() + microformats_publisher.size() + microformats_referral.size()
                + microformats_related.size() + microformats_replies.size() + microformats_resource.size()
                + microformats_search.size() + microformats_sponsor.size() + microformats_tooltip.size()
                + microformats_trackback.size() + microformats_unendorsed.size() + microformats_user.size()
                + microformats_wlw.size() + foaf;
        micron2 = microformats2_hadr.size() + microformats2_hcard.size() + microformats2_hentry.size()
                + microformats2_hevent.size() + microformats2_hgeo.size() + microformats2_hitem.size()
                + microformats2_hproduct.size() + microformats2_hrecipe.size() + microformats2_hresume.size()
                + microformats2_hreview.size() + microformats2_hreviewagg.size();
        total_micron = micron1 + micron2;
        microd = microdata.size();
        return true;
    } catch (IOException | IllegalCharsetNameException ex) {
        Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

}

From source file:com.quarterfull.newsAndroid.NewsDetailFragment.java

public void onCreateContextMenu(ContextMenu menu, View v, ContextMenu.ContextMenuInfo menuInfo) {
    if (v instanceof WebView) {
        WebView.HitTestResult result = ((WebView) v).getHitTestResult();
        if (result != null) {
            int type = result.getType();

            Document htmldoc = Jsoup.parse(html);

            FragmentTransaction ft = getFragmentManager().beginTransaction();

            if (type == WebView.HitTestResult.IMAGE_TYPE
                    || type == WebView.HitTestResult.SRC_IMAGE_ANCHOR_TYPE) {
                String imageUrl = result.getExtra();
                if (imageUrl.startsWith("http") || imageUrl.startsWith("file")) {

                    URL mImageUrl;
                    String imgtitle;
                    String imgaltval;
                    String imgsrcval;

                    imgsrcval = imageUrl.substring(imageUrl.lastIndexOf('/') + 1, imageUrl.length());
                    Elements imgtag = htmldoc.getElementsByAttributeValueContaining("src", imageUrl);

                    try {
                        imgtitle = imgtag.first().attr("title");
                    } catch (NullPointerException e) {
                        imgtitle = "";
                    }/*from  w w w.j a v a2  s.  co m*/
                    try {
                        imgaltval = imgtag.first().attr("alt");
                    } catch (NullPointerException e) {
                        imgaltval = "";
                    }
                    try {
                        mImageUrl = new URL(imageUrl);
                    } catch (MalformedURLException e) {
                        return;
                    }

                    String title = imgsrcval;
                    int titleIcon = android.R.drawable.ic_menu_gallery;
                    String text = (imgtitle.isEmpty()) ? imgaltval : imgtitle;

                    // Create and show the dialog.
                    DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceImage(title,
                            titleIcon, text, mImageUrl);
                    newFragment.show(ft, "menu_fragment_dialog");
                }
            } else if (type == WebView.HitTestResult.SRC_ANCHOR_TYPE) {
                String url = result.getExtra();
                URL mUrl;
                String text;
                try {
                    Elements urltag = htmldoc.getElementsByAttributeValueContaining("href", url);
                    text = urltag.text();
                    mUrl = new URL(url);
                } catch (MalformedURLException e) {
                    return;
                }

                // Create and show the dialog.
                DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceUrl(text,
                        mUrl.toString());
                newFragment.show(ft, "menu_fragment_dialog");
            }
            //else if (type == WebView.HitTestResult.EMAIL_TYPE) { }
            //else if (type == WebView.HitTestResult.GEO_TYPE) { }
            //else if (type == WebView.HitTestResult.PHONE_TYPE) { }
            //else if (type == WebView.HitTestResult.EDIT_TEXT_TYPE) { }
        }
    }
}

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());

    if (options.getType() != MediaType.MOVIE) {
        throw new UnsupportedMediaTypeException(options.getType());
    }//ww w . ja  va2  s . c  o  m

    // we have 3 entry points here
    // a) getMetadata has been called with an ofdbId
    // b) getMetadata has been called with an imdbId
    // c) getMetadata has been called from a previous search

    String detailUrl = "";

    // case a) and c)
    if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId())) || options.getResult() != null) {

        if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId()))) {
            detailUrl = "http://www.ofdb.de/view.php?page=film&fid=" + options.getId(getProviderInfo().getId());
        } else {
            detailUrl = options.getResult().getUrl();
        }
    }

    // case b)
    if (options.getResult() == null && StringUtils.isNotBlank(options.getId(MediaMetadata.IMDB))) {
        MediaSearchOptions searchOptions = new MediaSearchOptions(MediaType.MOVIE);
        searchOptions.setImdbId(options.getId(MediaMetadata.IMDB));
        try {
            List<MediaSearchResult> results = search(searchOptions);
            if (results != null && !results.isEmpty()) {
                options.setResult(results.get(0));
                detailUrl = options.getResult().getUrl();
            }
        } catch (Exception e) {
            LOGGER.warn("failed IMDB search: " + e.getMessage());
        }
    }

    // we can only work further if we got a search result on ofdb.de
    if (StringUtils.isBlank(detailUrl)) {
        throw new Exception("We did not get any useful movie url");
    }

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    // generic Elements used all over
    Elements el = null;
    String ofdbId = StrgUtils.substr(detailUrl, "film\\/(\\d+),");
    if (StringUtils.isBlank(ofdbId)) {
        ofdbId = StrgUtils.substr(detailUrl, "fid=(\\d+)");
    }

    Url url;
    try {
        LOGGER.trace("get details page");
        url = new Url(detailUrl);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, "UTF-8", "");
        in.close();

        if (doc.getAllElements().size() < 10) {
            throw new Exception("meh - we did not receive a valid web page");
        }

        // parse details

        // IMDB ID "http://www.imdb.com/Title?1194173"
        el = doc.getElementsByAttributeValueContaining("href", "imdb.com");
        if (!el.isEmpty()) {
            md.setId(MediaMetadata.IMDB, "tt" + StrgUtils.substr(el.first().attr("href"), "\\?(\\d+)"));
        }

        // title / year
        // <meta property="og:title" content="Bourne Vermchtnis, Das (2012)" />
        el = doc.getElementsByAttributeValue("property", "og:title");
        if (!el.isEmpty()) {
            String[] ty = parseTitle(el.first().attr("content"));
            md.setTitle(StrgUtils.removeCommonSortableName(ty[0]));
            try {
                md.setYear(Integer.parseInt(ty[1]));
            } catch (Exception ignored) {
            }
        }
        // another year position
        if (md.getYear() == 0) {
            // <a href="view.php?page=blaettern&Kat=Jahr&Text=2012">2012</a>
            el = doc.getElementsByAttributeValueContaining("href", "Kat=Jahr");
            try {
                md.setYear(Integer.parseInt(el.first().text()));
            } catch (Exception ignored) {
            }
        }

        // original title (has to be searched with a regexp)
        // <tr valign="top">
        // <td nowrap=""><font class="Normal" face="Arial,Helvetica,sans-serif"
        // size="2">Originaltitel:</font></td>
        // <td>&nbsp;&nbsp;</td>
        // <td width="99%"><font class="Daten" face="Arial,Helvetica,sans-serif"
        // size="2"><b>Brave</b></font></td>
        // </tr>
        String originalTitle = StrgUtils.substr(doc.body().html(), "(?s)Originaltitel.*?<b>(.*?)</b>");
        if (!originalTitle.isEmpty()) {
            md.setOriginalTitle(StrgUtils.removeCommonSortableName(originalTitle));
        }

        // Genre: <a href="view.php?page=genre&Genre=Action">Action</a>
        el = doc.getElementsByAttributeValueContaining("href", "page=genre");
        for (Element g : el) {
            md.addGenre(getTmmGenre(g.text()));
        }

        // rating
        // <div itemtype="http://schema.org/AggregateRating" itemscope
        // itemprop="aggregateRating">Note: <span
        // itemprop="ratingValue">6.73</span><meta
        // itemprop="worstRating" content="1" />
        el = doc.getElementsByAttributeValue("itemprop", "ratingValue");
        if (!el.isEmpty()) {
            String r = el.text();
            if (!r.isEmpty()) {
                try {
                    md.setRating(Float.parseFloat(r));
                } catch (Exception e) {
                    LOGGER.debug("could not parse rating");
                }
            }
        }

        // get PlotLink; open url and parse
        // <a href="plot/22523,31360,Die-Bourne-Identitt"><b>[mehr]</b></a>
        LOGGER.trace("parse plot");
        el = doc.getElementsByAttributeValueMatching("href", "plot\\/\\d+,");
        if (!el.isEmpty()) {
            String plotUrl = BASE_URL + "/" + el.first().attr("href");
            try {
                url = new Url(plotUrl);
                in = url.getInputStream();
                Document plot = Jsoup.parse(in, "UTF-8", "");
                in.close();
                Elements block = plot.getElementsByClass("Blocksatz"); // first
                                                                       // Blocksatz
                                                                       // is plot
                String p = block.first().text(); // remove all html stuff
                p = p.substring(p.indexOf("Mal gelesen") + 12); // remove "header"
                md.setPlot(p);
            } catch (Exception e) {
                LOGGER.error("failed to get plot page: " + e.getMessage());
            }
        }

        // http://www.ofdb.de/view.php?page=film_detail&fid=226745
        LOGGER.debug("parse actor detail");
        String movieDetail = BASE_URL + "/view.php?page=film_detail&fid=" + ofdbId;
        doc = null;
        try {
            url = new Url(movieDetail);
            in = url.getInputStream();
            doc = Jsoup.parse(in, "UTF-8", "");
            in.close();
        } catch (Exception e) {
            LOGGER.error("failed to get detail page: " + e.getMessage());
        }

        if (doc != null) {
            parseCast(doc.getElementsContainingOwnText("Regie"), MediaCastMember.CastType.DIRECTOR, md);
            parseCast(doc.getElementsContainingOwnText("Darsteller"), MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Stimme/Sprecher"), MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Synchronstimme (deutsch)"),
                    MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Drehbuchautor(in)"), MediaCastMember.CastType.WRITER,
                    md);
            parseCast(doc.getElementsContainingOwnText("Produzent(in)"), MediaCastMember.CastType.PRODUCER, md);
        }
    } catch (Exception e) {
        LOGGER.error("Error parsing " + detailUrl);
        throw e;
    }

    return md;
}

From source file:org.tinymediamanager.scraper.zelluloid.ZelluloidMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());

    // we can only work further if we got a search result on zelluloid.de
    if (options.getResult() == null) {
        throw new Exception("Scrape with Zelluloid.de without prior search is not supported");
    }/*from  w w  w .ja v a 2  s  . com*/

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    // generic Elements used all over
    Elements el = null;
    // preset values from searchresult (if we have them)
    md.storeMetadata(MediaMetadata.ORIGINAL_TITLE,
            Utils.removeSortableName(options.getResult().getOriginalTitle()));
    md.storeMetadata(MediaMetadata.TITLE, Utils.removeSortableName(options.getResult().getTitle()));
    md.storeMetadata(MediaMetadata.YEAR, options.getResult().getYear());
    md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, options.getResult().getOriginalTitle());

    String id = "";
    if (StringUtils.isEmpty(options.getResult().getId())) {
        id = StrgUtils.substr(options.getResult().getUrl(), "id=(.*?)");
    } else {
        id = options.getResult().getId();
    }

    String detailurl = options.getResult().getUrl();
    if (StringUtils.isEmpty(detailurl)) {
        detailurl = BASE_URL + "/filme/index.php3?id=" + id;
    }

    Url url;
    try {
        LOGGER.debug("get details page");
        url = new CachedUrl(detailurl);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, PAGE_ENCODING, "");
        in.close();

        // parse plot
        String plot = doc.getElementsByAttributeValue("class", "bigtext").text();
        md.storeMetadata(MediaMetadata.PLOT, plot);
        md.storeMetadata(MediaMetadata.TAGLINE, plot.length() > 150 ? plot.substring(0, 150) : plot);

        // parse poster
        el = doc.getElementsByAttributeValueStarting("src", "/images/poster");
        if (el.size() == 1) {
            md.storeMetadata(MediaMetadata.POSTER_URL, BASE_URL + el.get(0).attr("src"));
        }

        // parse year
        if (StringUtils.isEmpty(md.getStringValue(MediaMetadata.YEAR))) {
            el = doc.getElementsByAttributeValueContaining("href", "az.php3?j=");
            if (el.size() == 1) {
                md.storeMetadata(MediaMetadata.YEAR, el.get(0).text());
            }
        }

        // parse cinema release
        el = doc.getElementsByAttributeValueContaining("href", "?v=w");
        if (el.size() > 0) {
            try {
                SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy");
                Date d = sdf.parse(el.get(0).text());
                sdf = new SimpleDateFormat("yyyy-MM-dd");
                md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(d));
            } catch (Exception e) {
                LOGGER.warn("cannot parse cinema release date: " + el.get(0).text());
            }
        }

        // parse original title
        if (StringUtils.isEmpty(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE,
                    StrgUtils.substr(doc.toString(), "Originaltitel: (.*?)\\<"));
        }
        if (StringUtils.isEmpty(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
        }

        // parse runtime
        String rt = (StrgUtils.substr(doc.toString(), "ca.&nbsp;(.*?)&nbsp;min"));
        if (!rt.isEmpty()) {
            try {
                md.storeMetadata(MediaMetadata.RUNTIME, Integer.valueOf(rt));
            } catch (Exception e2) {
                LOGGER.warn("cannot convert runtime: " + rt);
            }
        }

        // parse genres
        el = doc.getElementsByAttributeValueContaining("href", "az.php3?g=");
        for (Element g : el) {
            String gid = g.attr("href").substring(g.attr("href").lastIndexOf('=') + 1);
            md.addGenre(getTmmGenre(gid));
        }

        // parse cert
        // FSK: ab 12, $230 Mio. Budget
        String fsk = StrgUtils.substr(doc.toString(), "FSK: (.*?)[,<]");
        if (!fsk.isEmpty()) {
            md.addCertification(Certification.findCertification(fsk));
        }

        // parse rating
        Elements ratings = doc.getElementsByAttributeValue("class", "ratingBarTable");
        if (ratings.size() == 2) { // get user rating
            Element e = ratings.get(1);
            // <div>87%</div>
            String r = e.getElementsByTag("div").text().replace("%", "");
            try {
                md.storeMetadata(MediaMetadata.RATING, Double.valueOf(r) / 10); // only 0-10
            } catch (Exception e2) {
                LOGGER.warn("cannot convert rating: " + r);
            }
        }

        // details page
        doc = null;
        String detailsUrl = BASE_URL + "/filme/details.php3?id=" + id;
        try {
            url = new CachedUrl(detailsUrl);
            in = url.getInputStream();
            doc = Jsoup.parse(in, PAGE_ENCODING, "");
            in.close();
        } catch (Exception e) {
            LOGGER.error("failed to get details: " + e.getMessage());

            // clear cache
            CachedUrl.removeCachedFileForUrl(detailsUrl);
        }

        if (doc != null) {
            Element tab = doc.getElementById("ccdetails");
            int header = 0;
            String lastRole = "";
            for (Element tr : tab.getElementsByTag("tr")) {
                if (tr.toString().contains("dyngfx")) { // header gfx
                    if (tr.toString().contains("Besetzung")) {
                        header = 1;
                    } else if (tr.toString().contains("Crew")) {
                        header = 2;
                    } else if (tr.toString().contains("Produktion")) {
                        header = 3;
                    } else if (tr.toString().contains("Verleih")) {
                        header = 4;
                    } else if (tr.toString().contains("Alternativtitel")) {
                        header = 5;
                    }
                    continue;
                } else {
                    // no header gfx, so data
                    MediaCastMember mcm = new MediaCastMember();
                    el = tr.getElementsByTag("td");
                    if (header == 1) {
                        // actors
                        if (el.size() == 2) {
                            mcm.setCharacter(el.get(0).text());
                            mcm.setName(el.get(1).getElementsByTag("a").text());
                            mcm.setId(StrgUtils.substr(el.get(1).getElementsByTag("a").attr("href"),
                                    "id=(\\d+)"));
                            mcm.setType(MediaCastMember.CastType.ACTOR);
                            // System.out.println("Cast: " + mcm.getCharacter() + " - " +
                            // mcm.getName());
                            md.addCastMember(mcm);
                            // TODO: parse actor detail pages :/
                        }
                    } else if (header == 2) {
                        // crew
                        if (el.size() == 2) {
                            String crewrole = el.get(0).html().trim();
                            mcm.setName(el.get(1).getElementsByTag("a").text());
                            if (crewrole.equals("&nbsp;")) {
                                mcm.setPart(lastRole);
                            } else {
                                mcm.setPart(crewrole);
                                lastRole = crewrole;
                            }
                            if (crewrole.equals("Regie")) {
                                mcm.setType(MediaCastMember.CastType.DIRECTOR);
                            } else if (crewrole.equals("Drehbuch")) {
                                mcm.setType(MediaCastMember.CastType.WRITER);
                            } else {
                                mcm.setType(MediaCastMember.CastType.OTHER);
                            }
                            mcm.setId(StrgUtils.substr(el.get(1).getElementsByTag("a").attr("href"),
                                    "id=(\\d+)"));
                            // System.out.println("Crew: " + mcm.getPart() + " - " +
                            // mcm.getName());
                            md.addCastMember(mcm);
                        }
                    } else if (header == 3) {
                        // production
                        md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, el.get(0).text());
                    }
                }
            }
        }

        // get links page
        doc = null;
        String linksUrl = BASE_URL + "/filme/links.php3?id=" + id;
        try {
            url = new CachedUrl(linksUrl);
            in = url.getInputStream();
            doc = Jsoup.parse(in, PAGE_ENCODING, "");
            in.close();
        } catch (Exception e) {
            LOGGER.error("failed to get links page: " + e.getMessage());

            // clear cache
            CachedUrl.removeCachedFileForUrl(linksUrl);
        }

        if (doc != null) {
            el = doc.getElementsByAttributeValueContaining("href", "german.imdb.com");
            if (el != null && el.size() > 0) {
                String imdb = StrgUtils.substr(el.get(0).attr("href"), "(tt\\d{7})");
                if (imdb.isEmpty()) {
                    imdb = "tt" + StrgUtils.substr(el.get(0).attr("href"), "\\?(\\d+)");
                }
                md.setId(MediaMetadata.IMDBID, imdb);
            }
        }
    } catch (Exception e) {
        LOGGER.error("Error parsing " + options.getResult().getUrl());

        // clear cache
        CachedUrl.removeCachedFileForUrl(detailurl);

        throw e;
    }

    return md;
}

From source file:org.tinymediamanager.scraper.zelluloid.ZelluloidMetadataProvider.java

@Override
public List<MediaSearchResult> search(MediaSearchOptions options) throws Exception {
    LOGGER.debug("search() " + options.toString());
    List<MediaSearchResult> resultList = new ArrayList<MediaSearchResult>();
    String searchUrl = "";
    String searchTerm = "";
    String imdb = "";

    // only title search
    if (StringUtils.isNotEmpty(options.get(MediaSearchOptions.SearchParam.QUERY))) {
        searchTerm = cleanSearch(options.get(MediaSearchOptions.SearchParam.QUERY));
        searchUrl = BASE_URL + "/suche/index.php3?qstring=" + URLEncoder.encode(searchTerm, "UTF-8");
        LOGGER.debug("search for everything: " + searchTerm);
    } else if (StringUtils.isNotEmpty(options.get(MediaSearchOptions.SearchParam.TITLE))) {
        searchTerm = cleanSearch(options.get(MediaSearchOptions.SearchParam.TITLE));
        searchUrl = BASE_URL + "/suche/index.php3?qstring=" + URLEncoder.encode(searchTerm, "UTF-8");
        LOGGER.debug("search with title: " + searchTerm);
    } else {//from   w w w  .  ja  v a2s .co m
        LOGGER.debug("empty searchString");
        return resultList;
    }

    searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm);

    Document doc = null;
    try {
        Url url = new CachedUrl(searchUrl);
        InputStream in = url.getInputStream();
        doc = Jsoup.parse(in, PAGE_ENCODING, "");
        in.close();
    } catch (Exception e) {
        LOGGER.error("failed to search for " + searchTerm + ": " + e.getMessage());

        // clear cache
        CachedUrl.removeCachedFileForUrl(searchUrl);
    }

    if (doc == null) {
        return resultList;
    }

    // only look for movie links
    Elements filme = doc.getElementsByAttributeValueStarting("href", "hit.php");
    LOGGER.debug("found " + filme.size() + " search results");
    if (filme.isEmpty()) {
        if (!doc.getElementsByTag("title").text().contains("Suche nach")) {
            // redirected to detail page
            MediaSearchResult msr = new MediaSearchResult(providerInfo.getId());
            Elements el = doc.getElementsByAttributeValueStarting("href", "index.php3?id=");
            if (el.size() > 0) {
                msr.setId(StrgUtils.substr(el.get(0).attr("href"), "id=(\\d+)"));
            }
            msr.setTitle(StrgUtils.substr(doc.getElementsByTag("title").text(), "(.*?)\\|").trim());
            el = doc.getElementsByAttributeValueContaining("href", "az.php3?j=");
            if (el.size() == 1) {
                msr.setYear(el.get(0).text());
            }
            resultList.add(msr);
        }
        return resultList;
    }

    // <a
    // href="hit.php3?hit=d6900d7d9baf66ba77d8e59cc425da9e-movie-7614-17114331-1"
    // class="normLight">Avatar - Aufbruch nach Pandora</B>
    // <nobr>(2009)</nobr><br /><span class="smallLight"
    // style="color:#ccc;">Avatar</span></a>

    // map to merge 2 results :/
    Map<String, MediaSearchResult> res = new HashMap<String, MediaSearchResult>();

    for (Element a : filme) {
        try {
            String id = StrgUtils.substr(a.attr("href"), "-movie-(.*?)-");
            MediaSearchResult sr = new MediaSearchResult(providerInfo.getId());
            if (res.containsKey(id)) {
                LOGGER.debug("dupe found; merging with previous searchresult");
                sr = res.get(id);
            }

            if (StringUtils.isNotEmpty(imdb)) {
                sr.setIMDBId(imdb);
            }
            if (StringUtils.isEmpty(sr.getId())) {
                sr.setId(id);
            }
            if (StringUtils.isEmpty(sr.getTitle())) {
                if (a.html().contains("nobr")) {
                    sr.setTitle(a.ownText());
                } else {
                    sr.setTitle(a.text());
                }
            }
            LOGGER.debug("found movie " + sr.getTitle());
            if (StringUtils.isEmpty(sr.getOriginalTitle())) {
                sr.setOriginalTitle(a.getElementsByTag("span").text());
            }
            if (StringUtils.isEmpty(sr.getYear())) {
                sr.setYear(StrgUtils.substr(a.getElementsByTag("nobr").text(), ".*(\\d{4}).*")); // any
                                                                                                 // 4
                                                                                                 // digit
            }
            sr.setMediaType(MediaType.MOVIE);
            sr.setUrl(BASE_URL + "/filme/index.php3?id=" + id);
            // sr.setPosterUrl(BASE_URL + "/images" + StrgUtils.substr(a.toString(),
            // "images(.*?)\\&quot"));

            if (imdb.equals(sr.getIMDBId())) {
                // perfect match
                sr.setScore(1);
            } else {
                // compare score based on names
                sr.setScore(MetadataUtil.calculateScore(searchTerm, sr.getTitle()));
            }

            // populate extra args
            MetadataUtil.copySearchQueryToSearchResult(options, sr);
            res.put(id, sr);
        } catch (Exception e) {
            LOGGER.warn("error parsing movie result: " + e.getMessage());
        }
    }
    for (String r : res.keySet()) {
        resultList.add(res.get(r));
    }
    Collections.sort(resultList);
    Collections.reverse(resultList);
    return resultList;
}