Example usage for org.jsoup.nodes Document getElementsByAttribute

List of usage examples for org.jsoup.nodes Document getElementsByAttribute

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByAttribute.

Prototype

public Elements getElementsByAttribute(String key) 

Source Link

Document

Find elements that have a named attribute set.

Usage

From source file:io.knotx.knot.action.domain.FormEntity.java

private static String getAdapterName(Fragment fragment, Document scriptDocument) {
    return Optional.ofNullable(scriptDocument.getElementsByAttribute(FORM_ACTION_ATTR).first())
            .map(element -> element.attr(FORM_ACTION_ATTR)).orElseThrow(() -> {
                LOGGER.error("Could not find action adapter name in fragment [{}].", fragment);
                return new NoSuchElementException("Could not find action adapter name");
            });//from  www.jav a  2 s. co m
}

From source file:eu.riscoss.dataproviders.providers.FossologyDataProvider.java

/**
 * Parses a LicensesCfg file//from  w  ww .j a v  a2s  .  c  o  m
 * @param target
 * @return HashMap: License Types, each with a Collection of Licenses
 * @throws IOException
 */
protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException {
    HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>();
    Document document;
    if (target.startsWith("http")) {
        document = Jsoup.connect(target).get();
    } else {
        File file = new File(target);
        System.out.println("Fossology config file used: " + file.getCanonicalPath());
        document = Jsoup.parse(file, "UTF-8", "http://localhost");
    }

    //        System.out.println(document.outerHtml());

    Elements licensesLinks = document.getElementsByAttribute("id");

    for (Element element : licensesLinks) {
        String licenseName = element.child(0).text();
        if (element.children().size() > 1) {
            String s = element.child(1).text();
            Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*")); //("\\s*\\|\\s*"));

            //xDebug            System.out.println("Analysed license type: "+licenseName+": "+licensesList);
            result.put(licenseName, licensesList);
        }
    }

    return result;
}

From source file:eu.riscoss.rdc.RDCFossology.java

/**
 * Parses a LicensesCfg file/*from   www . j  av  a 2  s  . c  om*/
 * @param target
 * @return HashMap: License Types, each with a Collection of Licenses
 * @throws IOException
 */
protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException {
    HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>();
    Document document;
    if (target.startsWith("http")) {
        document = Jsoup.connect(target).get();
    } else {
        if (target.startsWith("file:"))
            target = target.substring(5);

        //File file = new File(target);

        InputStream in = RDCFossology.class.getResourceAsStream("res/" + target);
        //System.out.println("Fossology config file used: "+file.getPath());
        //System.out.println("Fossology IS file used: "+in.toString());

        document = Jsoup.parse(in, "UTF-8", "http://localhost");

    }

    Elements licensesLinks = document.getElementsByAttribute("id");

    for (Element element : licensesLinks) {
        String licenseName = element.child(0).text();
        if (element.children().size() > 1) {
            String s = element.child(1).text();
            Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*"));

            result.put(licenseName, licensesList);
        }
    }

    return result;
}

From source file:com.geecko.QuickLyric.tasks.IdDecoder.java

@Override
protected Lyrics doInBackground(String... strings) {
    String url = strings[0];//from  www  . j av a2s  .  c o  m
    String artist;
    String track;
    if (url.contains("//www.soundhound.com/")) {
        try { // todo switch to Jsoup
            String html = getUrlAsString(url);
            int preceding = html.indexOf("root.App.trackDa") + 19;
            int following = html.substring(preceding).indexOf(";");
            String data = html.substring(preceding, preceding + following);
            JSONObject jsonData = new JSONObject(data);
            artist = jsonData.getString("artist_display_name");
            track = jsonData.getString("track_name");
        } catch (IOException | JSONException e) {
            e.printStackTrace();
            return new Lyrics(ERROR);
        }

    } else if (url.contains("//shz.am/")) {
        try {
            Document doc = Jsoup.connect(url.trim()).get();
            track = doc.getElementsByAttribute("data-track-title").text();
            artist = doc.getElementsByAttribute("data-track-artist").text();
        } catch (IOException e) {
            e.printStackTrace();
            return new Lyrics(ERROR);
        }
    } else if (url.contains("//play.google.com/store/music/")) {
        String docID = url.substring(url.indexOf("&tid=") + 5);
        try {
            Document doc = Jsoup.connect(url).get();
            Element playCell = doc.getElementsByAttributeValue("data-track-docid", docID).get(0);
            artist = doc.getElementsByClass("primary").text();
            track = playCell.parent().parent().child(1).getElementsByClass("title").text();
        } catch (IOException e) {
            e.printStackTrace();
            return new Lyrics(ERROR);
        }
    } else
        return new Lyrics(ERROR);
    Lyrics res = new Lyrics(Lyrics.SEARCH_ITEM);
    res.setArtist(artist);
    res.setTitle(track);
    return res;
}

From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java

/**
 * Liefert die URL's zu den Vorlagen //  w ww.j  a  v a  2 s .  c om
 * @param scenario Szenario
 * @return
 * @throws Exception Allgemeiner I/O Fehler
 */
public String[] getTemplatesURL(String scenario) throws Exception {
    ArrayList<String> s = new ArrayList<String>();
    HttpGet get = new HttpGet(url + scenario_prefix + scenario + templates);

    HttpResponse response = httpClient.execute(get);

    HttpEntity entity = response.getEntity();
    String pageHTML = EntityUtils.toString(entity);
    EntityUtils.consume(entity);

    Document document = Jsoup.parse(pageHTML);
    Elements elements = document.getElementsByAttribute("href");
    for (Element e : elements) {
        String attr = e.attr("href");
        if (attr.endsWith(".docx") || attr.endsWith(".xlsx") || attr.endsWith(".pptx"))
            s.add(url + scenario_prefix + scenario + templates + attr);
    }
    return (s.toArray(new String[s.size()]));
}

From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java

/**
 * Liefert alle Szenarion URL's /*from w  w  w . j av a2s. c  o  m*/
 * @return 
 * @throws Exception Allgemeiner I/O Fehler
 */
public String[] getScenarios() throws Exception {
    ArrayList<String> s = new ArrayList<String>();
    HttpGet get = new HttpGet(url + scenarios);

    try {
        HttpResponse response = httpClient.execute(get);

        HttpEntity entity = response.getEntity();
        String pageHTML = EntityUtils.toString(entity);
        EntityUtils.consume(entity);

        Document document = Jsoup.parse(pageHTML);
        Elements elements = document.getElementsByAttribute("href");
        for (Element e : elements) {
            if (e.attr("href").startsWith("/szenarien")) {
                String attr = e.attr("href").substring(scenario_prefix.length());
                attr = attr.substring(0, attr.lastIndexOf('/'));
                s.add(attr);
            }
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null,
                "Keine Online Verbindung mglich. Bitte Szenario manuell downloaden, entpacken und bei XMl Model eintragen.",
                "Keine Verbindung zu http://www.hermes.admin.ch", JOptionPane.WARNING_MESSAGE);

    }
    return (s.toArray(new String[s.size()]));
}

From source file:ru.neverdark.yotta.parser.YottaParser.java

private void parse(Array array) {
    final String URL = String.format("http://%s/hierarch.htm", array.getIp());
    final StringBuffer result = new StringBuffer();

    CredentialsProvider credsProvider = new BasicCredentialsProvider();
    credsProvider.setCredentials(new AuthScope(array.getIp(), 80),
            new UsernamePasswordCredentials(array.getUser(), array.getPassword()));
    CloseableHttpClient httpClient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build();
    try {//  w  w  w  .  ja  v a 2s  .co  m
        HttpGet httpget = new HttpGet(URL);
        CloseableHttpResponse response = httpClient.execute(httpget);
        System.err.printf("%s\t%s\n", array.getIp(), response.getStatusLine());
        try {
            BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));

            String line = "";
            while ((line = rd.readLine()) != null) {
                result.append(line);
            }

            Document doc = Jsoup.parse(result.toString());
            Elements tables = doc.getElementsByAttribute("vspace");
            // skip first
            for (int i = 1; i < tables.size(); i++) {
                parseTable(tables.get(i), array.getType());
            }

        } finally {
            response.close();
        }

    } catch (ClientProtocolException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        try {
            httpClient.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

From source file:com.iorga.iraj.servlet.AgglomeratorServlet.java

private void parseResource(final ServletConfig config, final String path)
        throws IOException, URISyntaxException {
    //TODO catch the modifications on the path itself
    final URL pathUrl = config.getServletContext().getResource(path);
    long lastModified = pathUrl.openConnection().getLastModified();
    final InputStream targetIS = pathUrl.openStream();
    final Document document = Jsoup.parse(targetIS, "UTF-8", "");
    final Elements elements = document.getElementsByAttribute(ATTRIBUTE_NAME);
    for (final Element element : elements) {
        // each element which defines iraj-agglomerate
        // retrieve the suffix
        final String suffix = element.attr(ATTRIBUTE_NAME);
        final String urlAttribute = element.attr(URL_ATTRIBUTE_ATTRIBUTE_NAME);
        String src = StringUtils.removeEndIgnoreCase(element.attr(urlAttribute), suffix);
        String prefix = "";
        if (!src.startsWith("/")) {
            // this is not an absolute file, let's add the prefix from the given path
            prefix = StringUtils.substringBeforeLast(path, "/") + "/";
            src = prefix + src;//www  .ja  v  a2s. c o  m
        }
        // searching all scripts inside the folder defined by src attribute
        lastModified = searchAndAppendAfter(config, element, src, prefix, suffix, urlAttribute, lastModified);
        // finally remove it
        element.remove();
    }

    caches.put(path, new ParsedResourceCacheEntry(path, document, lastModified));
}

From source file:com.thesmartweb.swebrank.WebParser.java

/**
 * Method to get the various html stats/*from   w  w  w. j av  a 2s .  c o m*/
 * @param link_html the url to analyze
 * @return flag if we got all the stats
 */
public boolean gethtmlstats(String link_html) {
    try {
        Document doc = Jsoup.connect(link_html).timeout(10 * 1000).get();
        Elements schemas = doc.getElementsByAttributeValueContaining("itemtype", "schema.org");
        Elements microdata = doc.getElementsByAttribute("itemtype");
        Elements microformats_vcard = doc.getElementsByAttributeValueContaining("class", "vcard");
        Elements microformats_hreview = doc.getElementsByAttributeValueContaining("class", "hreview");
        Elements microformats_vevent = doc.getElementsByAttributeValueContaining("class", "vevent");
        Elements microformats_vcalendar = doc.getElementsByAttributeValueContaining("class", "vcalendar");
        Elements microformats_vgeo = doc.getElementsByAttributeValueContaining("class", "geo");
        Elements microformats_vadrn = doc.getElementsByAttributeValueContaining("class", "ardn");
        Elements microformats_acquaintance = doc.getElementsByAttributeValueContaining("rel", "link_html");
        Elements microformats_alternate = doc.getElementsByAttributeValueContaining("rel", "alternate");
        Elements microformats_appendix = doc.getElementsByAttributeValueContaining("rel", "appendix");
        Elements microformats_bookmark = doc.getElementsByAttributeValueContaining("rel", "bookmark");
        Elements microformats_chapter = doc.getElementsByAttributeValueContaining("rel", "chapter");
        Elements microformats_child = doc.getElementsByAttributeValueContaining("rel", "child");
        Elements microformats_coll = doc.getElementsByAttributeValueContaining("rel", "colleague");
        Elements microformats_contact = doc.getElementsByAttributeValueContaining("rel", "contact");
        Elements microformats_contents = doc.getElementsByAttributeValueContaining("rel", "contents");
        Elements microformats_copyright = doc.getElementsByAttributeValueContaining("rel", "copyright");
        Elements microformats_coresident = doc.getElementsByAttributeValueContaining("rel", "co-resident");
        Elements microformats_coworker = doc.getElementsByAttributeValueContaining("rel", "co-worker");
        Elements microformats_crush = doc.getElementsByAttributeValueContaining("rel", "crush");
        Elements microformats_date = doc.getElementsByAttributeValueContaining("rel", "date");
        Elements microformats_friend = doc.getElementsByAttributeValueContaining("rel", "friend");
        Elements microformats_glossary = doc.getElementsByAttributeValueContaining("rel", "glossary");
        Elements microformats_help = doc.getElementsByAttributeValueContaining("rel", "help");
        Elements microformats_itsrules = doc.getElementsByAttributeValueContaining("rel", "its-rules");
        Elements microformats_kin = doc.getElementsByAttributeValueContaining("rel", "kin");
        Elements microformats_license = doc.getElementsByAttributeValueContaining("rel", "license");
        Elements microformats_me = doc.getElementsByAttributeValueContaining("rel", "me");
        Elements microformats_met = doc.getElementsByAttributeValueContaining("rel", "met");
        Elements microformats_muse = doc.getElementsByAttributeValueContaining("rel", "muse");
        Elements microformats_neighbor = doc.getElementsByAttributeValueContaining("rel", "neighbor");
        Elements microformats_next = doc.getElementsByAttributeValueContaining("rel", "next");
        Elements microformats_nofollow = doc.getElementsByAttributeValueContaining("rel", "nofollow");
        Elements microformats_parent = doc.getElementsByAttributeValueContaining("rel", "parent");
        Elements microformats_prev = doc.getElementsByAttributeValueContaining("rel", "prev");
        Elements microformats_previous = doc.getElementsByAttributeValueContaining("rel", "previous");
        Elements microformats_section = doc.getElementsByAttributeValueContaining("rel", "section");
        Elements microformats_sibling = doc.getElementsByAttributeValueContaining("rel", "sibling");
        Elements microformats_spouse = doc.getElementsByAttributeValueContaining("rel", "spouse");
        Elements microformats_start = doc.getElementsByAttributeValueContaining("rel", "start");
        Elements microformats_stylesheet = doc.getElementsByAttributeValueContaining("rel", "stylesheet");
        Elements microformats_subsection = doc.getElementsByAttributeValueContaining("rel", "subsection");
        Elements microformats_sweetheart = doc.getElementsByAttributeValueContaining("rel", "sweetheart");
        Elements microformats_tag = doc.getElementsByAttributeValueContaining("rel", "tag");
        Elements microformats_toc = doc.getElementsByAttributeValueContaining("rel", "toc");
        Elements microformats_transformation = doc.getElementsByAttributeValueContaining("rel",
                "transformation");
        Elements microformats_appleti = doc.getElementsByAttributeValueContaining("rel", "apple-touch-icon");
        Elements microformats_appletip = doc.getElementsByAttributeValueContaining("rel",
                "apple-touch-icon-precomposed");
        Elements microformats_appletsi = doc.getElementsByAttributeValueContaining("rel",
                "apple-touch-startup-image");
        Elements microformats_attachment = doc.getElementsByAttributeValueContaining("rel", "attachment");
        Elements microformats_can = doc.getElementsByAttributeValueContaining("rel", "canonical");
        Elements microformats_categ = doc.getElementsByAttributeValueContaining("rel", "category");
        Elements microformats_compon = doc.getElementsByAttributeValueContaining("rel", "component");
        Elements microformats_chromewebi = doc.getElementsByAttributeValueContaining("rel",
                "chrome-webstore-item");
        Elements microformats_disclosure = doc.getElementsByAttributeValueContaining("rel", "disclosure");
        Elements microformats_discussion = doc.getElementsByAttributeValueContaining("rel", "discussion");
        Elements microformats_dns = doc.getElementsByAttributeValueContaining("rel", "dns-prefetch");
        Elements microformats_edit = doc.getElementsByAttributeValueContaining("rel", "edit");
        Elements microformats_edituri = doc.getElementsByAttributeValueContaining("rel", "EditURI");
        Elements microformats_entrycon = doc.getElementsByAttributeValueContaining("rel", "entry-content");
        Elements microformats_external = doc.getElementsByAttributeValueContaining("rel", "external");
        Elements microformats_home = doc.getElementsByAttributeValueContaining("rel", "home");
        Elements microformats_hub = doc.getElementsByAttributeValueContaining("rel", "hub");
        Elements microformats_inreplyto = doc.getElementsByAttributeValueContaining("rel", "in-reply-to");
        Elements microformats_index = doc.getElementsByAttributeValueContaining("rel", "index");
        Elements microformats_indieauth = doc.getElementsByAttributeValueContaining("rel", "indieauth");
        Elements microformats_issues = doc.getElementsByAttributeValueContaining("rel", "issues");
        Elements microformats_lightbox = doc.getElementsByAttributeValueContaining("rel", "lightbox");
        Elements microformats_meta = doc.getElementsByAttributeValueContaining("rel", "meta");
        Elements microformats_openid = doc.getElementsByAttributeValueContaining("rel", "opendid");
        Elements microformats_p3pv1 = doc.getElementsByAttributeValueContaining("rel", "p3pv1");
        Elements microformats_pgpkey = doc.getElementsByAttributeValueContaining("rel", "pgpkey");
        Elements microformats_pingback = doc.getElementsByAttributeValueContaining("rel", "pingback");
        Elements microformats_prerender = doc.getElementsByAttributeValueContaining("rel", "prerender");
        Elements microformats_profile = doc.getElementsByAttributeValueContaining("rel", "profile");
        Elements microformats_rendition = doc.getElementsByAttributeValueContaining("rel", "rendition");
        Elements microformats_service = doc.getElementsByAttributeValueContaining("rel", "service");
        Elements microformats_shortlink = doc.getElementsByAttributeValueContaining("rel", "shortlink");
        Elements microformats_sidebar = doc.getElementsByAttributeValueContaining("rel", "sidebar");
        Elements microformats_sitemap = doc.getElementsByAttributeValueContaining("rel", "sitemap");
        Elements microformats_subresource = doc.getElementsByAttributeValueContaining("rel", "subresource");
        Elements microformats_syndication = doc.getElementsByAttributeValueContaining("rel", "syndication");
        Elements microformats_timesheet = doc.getElementsByAttributeValueContaining("rel", "timesheet");
        Elements microformats_webmention = doc.getElementsByAttributeValueContaining("rel", "webmention");
        Elements microformats_widget = doc.getElementsByAttributeValueContaining("rel", "widget");
        Elements microformats_wlwmanifest = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest");
        Elements microformats_imgsrc = doc.getElementsByAttributeValueContaining("rel", "image_src");
        Elements microformats_cmisacl = doc.getElementsByAttributeValueContaining("rel",
                "http://docs.oasis-open.org/ns/cmis/link/200908/acl");
        Elements microformats_stylesheetless = doc.getElementsByAttributeValueContaining("rel",
                "stylesheet/less");
        Elements microformats_accessibility = doc.getElementsByAttributeValueContaining("rel", "accessibility");
        Elements microformats_biblio = doc.getElementsByAttributeValueContaining("rel", "bibliography");
        Elements microformats_cite = doc.getElementsByAttributeValueContaining("rel", "cite");
        Elements microformats_group = doc.getElementsByAttributeValueContaining("rel", "group");
        Elements microformats_jslicence = doc.getElementsByAttributeValueContaining("rel", "jslicense");
        Elements microformats_longdesc = doc.getElementsByAttributeValueContaining("rel", "longdesc");
        Elements microformats_map = doc.getElementsByAttributeValueContaining("rel", "map");
        Elements microformats_member = doc.getElementsByAttributeValueContaining("rel", "member");
        Elements microformats_source = doc.getElementsByAttributeValueContaining("rel", "source");
        Elements microformats_status = doc.getElementsByAttributeValueContaining("rel", "status");
        Elements microformats_archive = doc.getElementsByAttributeValueContaining("rel", "archive");
        Elements microformats_archives = doc.getElementsByAttributeValueContaining("rel", "archives");
        Elements microformats_comment = doc.getElementsByAttributeValueContaining("rel", "comment");
        Elements microformats_contribution = doc.getElementsByAttributeValueContaining("rel", "contribution");
        Elements microformats_endorsed = doc.getElementsByAttributeValueContaining("rel", "endorsed");
        Elements microformats_fan = doc.getElementsByAttributeValueContaining("rel", "fan");
        Elements microformats_feed = doc.getElementsByAttributeValueContaining("rel", "feed");
        Elements microformats_footnote = doc.getElementsByAttributeValueContaining("rel", "footnote");
        Elements microformats_icon = doc.getElementsByAttributeValueContaining("rel", "icon");
        Elements microformats_kinstyle = doc.getElementsByAttributeValueContaining("rel", "kinetic-stylesheet");
        Elements microformats_prettyphoto = doc.getElementsByAttributeValueContaining("rel", "prettyPhoto");
        Elements microformats_clearbox = doc.getElementsByAttributeValueContaining("rel", "clearbox");
        Elements microformats_made = doc.getElementsByAttributeValueContaining("rel", "made");
        Elements microformats_microsummary = doc.getElementsByAttributeValueContaining("rel", "microsummary");
        Elements microformats_noreferrer = doc.getElementsByAttributeValueContaining("rel", "noreferrer");
        Elements microformats_permalink = doc.getElementsByAttributeValueContaining("rel", "permalink");
        Elements microformats_popover = doc.getElementsByAttributeValueContaining("rel", "popover");
        Elements microformats_prefetch = doc.getElementsByAttributeValueContaining("rel", "prefetch");
        Elements microformats_publickey = doc.getElementsByAttributeValueContaining("rel", "publickey");
        Elements microformats_publisher = doc.getElementsByAttributeValueContaining("rel", "publisher");
        Elements microformats_referral = doc.getElementsByAttributeValueContaining("rel", "referral");
        Elements microformats_related = doc.getElementsByAttributeValueContaining("rel", "related");
        Elements microformats_replies = doc.getElementsByAttributeValueContaining("rel", "replies");
        Elements microformats_resource = doc.getElementsByAttributeValueContaining("rel", "resource");
        Elements microformats_search = doc.getElementsByAttributeValueContaining("rel", "search");
        Elements microformats_sponsor = doc.getElementsByAttributeValueContaining("rel", "sponsor");
        Elements microformats_tooltip = doc.getElementsByAttributeValueContaining("rel", "tooltip");
        Elements microformats_trackback = doc.getElementsByAttributeValueContaining("rel", "trackback");
        Elements microformats_unendorsed = doc.getElementsByAttributeValueContaining("rel", "unendorsed");
        Elements microformats_user = doc.getElementsByAttributeValueContaining("rel", "user");
        Elements microformats_wlw = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest");
        //-----microformats2
        Elements microformats2_hadr = doc.getElementsByAttributeValueContaining("class", "h-adr");
        Elements microformats2_hcard = doc.getElementsByAttributeValueContaining("class", "h-card");
        Elements microformats2_hentry = doc.getElementsByAttributeValueContaining("class", "h-entry");
        Elements microformats2_hevent = doc.getElementsByAttributeValueContaining("class", "h-event");
        Elements microformats2_hgeo = doc.getElementsByAttributeValueContaining("class", "h-geo");
        Elements microformats2_hitem = doc.getElementsByAttributeValueContaining("class", "h-item");
        Elements microformats2_hproduct = doc.getElementsByAttributeValueContaining("class", "h-product");
        Elements microformats2_hrecipe = doc.getElementsByAttributeValueContaining("class", "h-recipe");
        Elements microformats2_hresume = doc.getElementsByAttributeValueContaining("class", "h-resume");
        Elements microformats2_hreview = doc.getElementsByAttributeValueContaining("class", "h-review");
        Elements microformats2_hreviewagg = doc.getElementsByAttributeValueContaining("class",
                "h-review-aggregate");
        Elements foaf_autodiscoveries = doc.getElementsByAttributeValueContaining("href", "foaf");
        Elements foaf_types = doc.getElementsByAttributeValueContaining("type", "foaf");
        Elements media = doc.select("embed");
        Elements iframes = doc.select("iframe");
        Elements script_el = doc.select("script");
        Elements reltags = doc.select("link[rel]");
        Elements reltags_a = doc.select("a[rel]");
        number_embeded_videos = media.size();
        scripts_number = script_el.size();
        frames_number = iframes.size();
        nschem = schemas.size();
        hreln = reltags.size() + reltags_a.size();
        foaf = foaf_autodiscoveries.size() + foaf_types.size();
        micron1 = microformats_cmisacl.size() + microformats_vcard.size() + microformats_vevent.size()
                + microformats_hreview.size() + microformats_vgeo.size() + microformats_vcalendar.size()
                + microformats_vadrn.size() + microformats_acquaintance.size() + microformats_alternate.size()
                + microformats_appendix.size() + microformats_biblio.size() + microformats_bookmark.size()
                + microformats_chapter.size() + microformats_child.size() + microformats_coll.size()
                + microformats_contact.size() + microformats_contents.size() + microformats_copyright.size()
                + microformats_coresident.size() + microformats_coworker.size() + microformats_crush.size()
                + microformats_date.size() + microformats_friend.size() + microformats_glossary.size()
                + microformats_help.size() + microformats_itsrules.size() + microformats_kin.size()
                + microformats_license.size() + microformats_me.size() + microformats_met.size()
                + microformats_muse.size() + microformats_neighbor.size() + microformats_next.size()
                + microformats_nofollow.size() + microformats_parent.size() + microformats_prev.size()
                + microformats_previous.size() + microformats_section.size() + microformats_sibling.size()
                + microformats_spouse.size() + microformats_start.size() + microformats_stylesheet.size()
                + microformats_subsection.size() + microformats_sweetheart.size() + microformats_tag.size()
                + microformats_toc.size() + microformats_transformation.size() + microformats_appleti.size()
                + microformats_appletip.size() + microformats_appletsi.size() + microformats_attachment.size()
                + microformats_can.size() + microformats_categ.size() + microformats_compon.size()
                + microformats_chromewebi.size() + microformats_disclosure.size()
                + microformats_discussion.size() + microformats_dns.size() + microformats_edit.size()
                + microformats_edituri.size() + microformats_entrycon.size() + microformats_external.size()
                + microformats_home.size() + microformats_hub.size() + microformats_inreplyto.size()
                + microformats_index.size() + microformats_indieauth.size() + microformats_issues.size()
                + microformats_lightbox.size() + microformats_meta.size() + microformats_openid.size()
                + microformats_p3pv1.size() + microformats_pgpkey.size() + microformats_pingback.size()
                + microformats_prerender.size() + microformats_profile.size() + microformats_rendition.size()
                + microformats_service.size() + microformats_shortlink.size() + microformats_sidebar.size()
                + microformats_sitemap.size() + microformats_subresource.size()
                + microformats_syndication.size() + microformats_timesheet.size()
                + microformats_webmention.size() + microformats_widget.size() + microformats_wlwmanifest.size()
                + microformats_imgsrc.size() + microformats_imgsrc.size() + microformats_stylesheetless.size()
                + microformats_accessibility.size() + microformats_accessibility.size()
                + microformats_cite.size() + microformats_group.size() + microformats_jslicence.size()
                + microformats_longdesc.size() + microformats_map.size() + microformats_member.size()
                + microformats_source.size() + microformats_status.size() + microformats_archive.size()
                + microformats_archives.size() + microformats_comment.size() + microformats_contribution.size()
                + microformats_endorsed.size() + microformats_fan.size() + microformats_feed.size()
                + microformats_footnote.size() + microformats_icon.size() + microformats_kinstyle.size()
                + microformats_prettyphoto.size() + microformats_clearbox.size() + microformats_made.size()
                + microformats_microsummary.size() + microformats_noreferrer.size()
                + microformats_permalink.size() + microformats_popover.size() + microformats_prefetch.size()
                + microformats_publickey.size() + microformats_publisher.size() + microformats_referral.size()
                + microformats_related.size() + microformats_replies.size() + microformats_resource.size()
                + microformats_search.size() + microformats_sponsor.size() + microformats_tooltip.size()
                + microformats_trackback.size() + microformats_unendorsed.size() + microformats_user.size()
                + microformats_wlw.size() + foaf;
        micron2 = microformats2_hadr.size() + microformats2_hcard.size() + microformats2_hentry.size()
                + microformats2_hevent.size() + microformats2_hgeo.size() + microformats2_hitem.size()
                + microformats2_hproduct.size() + microformats2_hrecipe.size() + microformats2_hresume.size()
                + microformats2_hreview.size() + microformats2_hreviewagg.size();
        total_micron = micron1 + micron2;
        microd = microdata.size();
        return true;
    } catch (IOException | IllegalCharsetNameException ex) {
        Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

}

From source file:no.kantega.publishing.admin.content.htmlfilter.ContextPathFilter.java

@Override
public Document runFilter(Document document) {
    for (String attribute : attributes) {
        Elements withHref = document.getElementsByAttribute(attribute);
        fixContextPathForAttribute(withHref, attribute);
    }// w w w .j  ava  2 s . c o m
    return document;
}