List of usage examples for org.jsoup.nodes Document getElementsByAttribute
public Elements getElementsByAttribute(String key)
From source file:io.knotx.knot.action.domain.FormEntity.java
private static String getAdapterName(Fragment fragment, Document scriptDocument) { return Optional.ofNullable(scriptDocument.getElementsByAttribute(FORM_ACTION_ATTR).first()) .map(element -> element.attr(FORM_ACTION_ATTR)).orElseThrow(() -> { LOGGER.error("Could not find action adapter name in fragment [{}].", fragment); return new NoSuchElementException("Could not find action adapter name"); });//from www.jav a 2 s. co m }
From source file:eu.riscoss.dataproviders.providers.FossologyDataProvider.java
/** * Parses a LicensesCfg file//from w ww .j a v a2s . c o m * @param target * @return HashMap: License Types, each with a Collection of Licenses * @throws IOException */ protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException { HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>(); Document document; if (target.startsWith("http")) { document = Jsoup.connect(target).get(); } else { File file = new File(target); System.out.println("Fossology config file used: " + file.getCanonicalPath()); document = Jsoup.parse(file, "UTF-8", "http://localhost"); } // System.out.println(document.outerHtml()); Elements licensesLinks = document.getElementsByAttribute("id"); for (Element element : licensesLinks) { String licenseName = element.child(0).text(); if (element.children().size() > 1) { String s = element.child(1).text(); Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*")); //("\\s*\\|\\s*")); //xDebug System.out.println("Analysed license type: "+licenseName+": "+licensesList); result.put(licenseName, licensesList); } } return result; }
From source file:eu.riscoss.rdc.RDCFossology.java
/** * Parses a LicensesCfg file/*from www . j av a 2 s . c om*/ * @param target * @return HashMap: License Types, each with a Collection of Licenses * @throws IOException */ protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException { HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>(); Document document; if (target.startsWith("http")) { document = Jsoup.connect(target).get(); } else { if (target.startsWith("file:")) target = target.substring(5); //File file = new File(target); InputStream in = RDCFossology.class.getResourceAsStream("res/" + target); //System.out.println("Fossology config file used: "+file.getPath()); //System.out.println("Fossology IS file used: "+in.toString()); document = Jsoup.parse(in, "UTF-8", "http://localhost"); } Elements licensesLinks = document.getElementsByAttribute("id"); for (Element element : licensesLinks) { String licenseName = element.child(0).text(); if (element.children().size() > 1) { String s = element.child(1).text(); Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*")); result.put(licenseName, licensesList); } } return result; }
From source file:com.geecko.QuickLyric.tasks.IdDecoder.java
@Override protected Lyrics doInBackground(String... strings) { String url = strings[0];//from www . j av a2s . c o m String artist; String track; if (url.contains("//www.soundhound.com/")) { try { // todo switch to Jsoup String html = getUrlAsString(url); int preceding = html.indexOf("root.App.trackDa") + 19; int following = html.substring(preceding).indexOf(";"); String data = html.substring(preceding, preceding + following); JSONObject jsonData = new JSONObject(data); artist = jsonData.getString("artist_display_name"); track = jsonData.getString("track_name"); } catch (IOException | JSONException e) { e.printStackTrace(); return new Lyrics(ERROR); } } else if (url.contains("//shz.am/")) { try { Document doc = Jsoup.connect(url.trim()).get(); track = doc.getElementsByAttribute("data-track-title").text(); artist = doc.getElementsByAttribute("data-track-artist").text(); } catch (IOException e) { e.printStackTrace(); return new Lyrics(ERROR); } } else if (url.contains("//play.google.com/store/music/")) { String docID = url.substring(url.indexOf("&tid=") + 5); try { Document doc = Jsoup.connect(url).get(); Element playCell = doc.getElementsByAttributeValue("data-track-docid", docID).get(0); artist = doc.getElementsByClass("primary").text(); track = playCell.parent().parent().child(1).getElementsByClass("title").text(); } catch (IOException e) { e.printStackTrace(); return new Lyrics(ERROR); } } else return new Lyrics(ERROR); Lyrics res = new Lyrics(Lyrics.SEARCH_ITEM); res.setArtist(artist); res.setTitle(track); return res; }
From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java
/** * Liefert die URL's zu den Vorlagen // w ww.j a v a 2 s . c om * @param scenario Szenario * @return * @throws Exception Allgemeiner I/O Fehler */ public String[] getTemplatesURL(String scenario) throws Exception { ArrayList<String> s = new ArrayList<String>(); HttpGet get = new HttpGet(url + scenario_prefix + scenario + templates); HttpResponse response = httpClient.execute(get); HttpEntity entity = response.getEntity(); String pageHTML = EntityUtils.toString(entity); EntityUtils.consume(entity); Document document = Jsoup.parse(pageHTML); Elements elements = document.getElementsByAttribute("href"); for (Element e : elements) { String attr = e.attr("href"); if (attr.endsWith(".docx") || attr.endsWith(".xlsx") || attr.endsWith(".pptx")) s.add(url + scenario_prefix + scenario + templates + attr); } return (s.toArray(new String[s.size()])); }
From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java
/** * Liefert alle Szenarion URL's /*from w w w . j av a2s. c o m*/ * @return * @throws Exception Allgemeiner I/O Fehler */ public String[] getScenarios() throws Exception { ArrayList<String> s = new ArrayList<String>(); HttpGet get = new HttpGet(url + scenarios); try { HttpResponse response = httpClient.execute(get); HttpEntity entity = response.getEntity(); String pageHTML = EntityUtils.toString(entity); EntityUtils.consume(entity); Document document = Jsoup.parse(pageHTML); Elements elements = document.getElementsByAttribute("href"); for (Element e : elements) { if (e.attr("href").startsWith("/szenarien")) { String attr = e.attr("href").substring(scenario_prefix.length()); attr = attr.substring(0, attr.lastIndexOf('/')); s.add(attr); } } } catch (Exception e) { JOptionPane.showMessageDialog(null, "Keine Online Verbindung mglich. Bitte Szenario manuell downloaden, entpacken und bei XMl Model eintragen.", "Keine Verbindung zu http://www.hermes.admin.ch", JOptionPane.WARNING_MESSAGE); } return (s.toArray(new String[s.size()])); }
From source file:ru.neverdark.yotta.parser.YottaParser.java
private void parse(Array array) { final String URL = String.format("http://%s/hierarch.htm", array.getIp()); final StringBuffer result = new StringBuffer(); CredentialsProvider credsProvider = new BasicCredentialsProvider(); credsProvider.setCredentials(new AuthScope(array.getIp(), 80), new UsernamePasswordCredentials(array.getUser(), array.getPassword())); CloseableHttpClient httpClient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build(); try {// w w w . ja v a 2s .co m HttpGet httpget = new HttpGet(URL); CloseableHttpResponse response = httpClient.execute(httpget); System.err.printf("%s\t%s\n", array.getIp(), response.getStatusLine()); try { BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent())); String line = ""; while ((line = rd.readLine()) != null) { result.append(line); } Document doc = Jsoup.parse(result.toString()); Elements tables = doc.getElementsByAttribute("vspace"); // skip first for (int i = 1; i < tables.size(); i++) { parseTable(tables.get(i), array.getType()); } } finally { response.close(); } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { httpClient.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
From source file:com.iorga.iraj.servlet.AgglomeratorServlet.java
private void parseResource(final ServletConfig config, final String path) throws IOException, URISyntaxException { //TODO catch the modifications on the path itself final URL pathUrl = config.getServletContext().getResource(path); long lastModified = pathUrl.openConnection().getLastModified(); final InputStream targetIS = pathUrl.openStream(); final Document document = Jsoup.parse(targetIS, "UTF-8", ""); final Elements elements = document.getElementsByAttribute(ATTRIBUTE_NAME); for (final Element element : elements) { // each element which defines iraj-agglomerate // retrieve the suffix final String suffix = element.attr(ATTRIBUTE_NAME); final String urlAttribute = element.attr(URL_ATTRIBUTE_ATTRIBUTE_NAME); String src = StringUtils.removeEndIgnoreCase(element.attr(urlAttribute), suffix); String prefix = ""; if (!src.startsWith("/")) { // this is not an absolute file, let's add the prefix from the given path prefix = StringUtils.substringBeforeLast(path, "/") + "/"; src = prefix + src;//www .ja v a2s. c o m } // searching all scripts inside the folder defined by src attribute lastModified = searchAndAppendAfter(config, element, src, prefix, suffix, urlAttribute, lastModified); // finally remove it element.remove(); } caches.put(path, new ParsedResourceCacheEntry(path, document, lastModified)); }
From source file:com.thesmartweb.swebrank.WebParser.java
/** * Method to get the various html stats/*from w w w. j av a 2s . c o m*/ * @param link_html the url to analyze * @return flag if we got all the stats */ public boolean gethtmlstats(String link_html) { try { Document doc = Jsoup.connect(link_html).timeout(10 * 1000).get(); Elements schemas = doc.getElementsByAttributeValueContaining("itemtype", "schema.org"); Elements microdata = doc.getElementsByAttribute("itemtype"); Elements microformats_vcard = doc.getElementsByAttributeValueContaining("class", "vcard"); Elements microformats_hreview = doc.getElementsByAttributeValueContaining("class", "hreview"); Elements microformats_vevent = doc.getElementsByAttributeValueContaining("class", "vevent"); Elements microformats_vcalendar = doc.getElementsByAttributeValueContaining("class", "vcalendar"); Elements microformats_vgeo = doc.getElementsByAttributeValueContaining("class", "geo"); Elements microformats_vadrn = doc.getElementsByAttributeValueContaining("class", "ardn"); Elements microformats_acquaintance = doc.getElementsByAttributeValueContaining("rel", "link_html"); Elements microformats_alternate = doc.getElementsByAttributeValueContaining("rel", "alternate"); Elements microformats_appendix = doc.getElementsByAttributeValueContaining("rel", "appendix"); Elements microformats_bookmark = doc.getElementsByAttributeValueContaining("rel", "bookmark"); Elements microformats_chapter = doc.getElementsByAttributeValueContaining("rel", "chapter"); Elements microformats_child = doc.getElementsByAttributeValueContaining("rel", "child"); Elements microformats_coll = doc.getElementsByAttributeValueContaining("rel", "colleague"); Elements microformats_contact = doc.getElementsByAttributeValueContaining("rel", "contact"); Elements microformats_contents = doc.getElementsByAttributeValueContaining("rel", "contents"); Elements microformats_copyright = doc.getElementsByAttributeValueContaining("rel", "copyright"); Elements microformats_coresident = doc.getElementsByAttributeValueContaining("rel", "co-resident"); Elements microformats_coworker = doc.getElementsByAttributeValueContaining("rel", "co-worker"); Elements microformats_crush = doc.getElementsByAttributeValueContaining("rel", "crush"); Elements microformats_date = doc.getElementsByAttributeValueContaining("rel", "date"); Elements microformats_friend = doc.getElementsByAttributeValueContaining("rel", "friend"); Elements microformats_glossary = doc.getElementsByAttributeValueContaining("rel", "glossary"); Elements microformats_help = doc.getElementsByAttributeValueContaining("rel", "help"); Elements microformats_itsrules = doc.getElementsByAttributeValueContaining("rel", "its-rules"); Elements microformats_kin = doc.getElementsByAttributeValueContaining("rel", "kin"); Elements microformats_license = doc.getElementsByAttributeValueContaining("rel", "license"); Elements microformats_me = doc.getElementsByAttributeValueContaining("rel", "me"); Elements microformats_met = doc.getElementsByAttributeValueContaining("rel", "met"); Elements microformats_muse = doc.getElementsByAttributeValueContaining("rel", "muse"); Elements microformats_neighbor = doc.getElementsByAttributeValueContaining("rel", "neighbor"); Elements microformats_next = doc.getElementsByAttributeValueContaining("rel", "next"); Elements microformats_nofollow = doc.getElementsByAttributeValueContaining("rel", "nofollow"); Elements microformats_parent = doc.getElementsByAttributeValueContaining("rel", "parent"); Elements microformats_prev = doc.getElementsByAttributeValueContaining("rel", "prev"); Elements microformats_previous = doc.getElementsByAttributeValueContaining("rel", "previous"); Elements microformats_section = doc.getElementsByAttributeValueContaining("rel", "section"); Elements microformats_sibling = doc.getElementsByAttributeValueContaining("rel", "sibling"); Elements microformats_spouse = doc.getElementsByAttributeValueContaining("rel", "spouse"); Elements microformats_start = doc.getElementsByAttributeValueContaining("rel", "start"); Elements microformats_stylesheet = doc.getElementsByAttributeValueContaining("rel", "stylesheet"); Elements microformats_subsection = doc.getElementsByAttributeValueContaining("rel", "subsection"); Elements microformats_sweetheart = doc.getElementsByAttributeValueContaining("rel", "sweetheart"); Elements microformats_tag = doc.getElementsByAttributeValueContaining("rel", "tag"); Elements microformats_toc = doc.getElementsByAttributeValueContaining("rel", "toc"); Elements microformats_transformation = doc.getElementsByAttributeValueContaining("rel", "transformation"); Elements microformats_appleti = doc.getElementsByAttributeValueContaining("rel", "apple-touch-icon"); Elements microformats_appletip = doc.getElementsByAttributeValueContaining("rel", "apple-touch-icon-precomposed"); Elements microformats_appletsi = doc.getElementsByAttributeValueContaining("rel", "apple-touch-startup-image"); Elements microformats_attachment = doc.getElementsByAttributeValueContaining("rel", "attachment"); Elements microformats_can = doc.getElementsByAttributeValueContaining("rel", "canonical"); Elements microformats_categ = doc.getElementsByAttributeValueContaining("rel", "category"); Elements microformats_compon = doc.getElementsByAttributeValueContaining("rel", "component"); Elements microformats_chromewebi = doc.getElementsByAttributeValueContaining("rel", "chrome-webstore-item"); Elements microformats_disclosure = doc.getElementsByAttributeValueContaining("rel", "disclosure"); Elements microformats_discussion = doc.getElementsByAttributeValueContaining("rel", "discussion"); Elements microformats_dns = doc.getElementsByAttributeValueContaining("rel", "dns-prefetch"); Elements microformats_edit = doc.getElementsByAttributeValueContaining("rel", "edit"); Elements microformats_edituri = doc.getElementsByAttributeValueContaining("rel", "EditURI"); Elements microformats_entrycon = doc.getElementsByAttributeValueContaining("rel", "entry-content"); Elements microformats_external = doc.getElementsByAttributeValueContaining("rel", "external"); Elements microformats_home = doc.getElementsByAttributeValueContaining("rel", "home"); Elements microformats_hub = doc.getElementsByAttributeValueContaining("rel", "hub"); Elements microformats_inreplyto = doc.getElementsByAttributeValueContaining("rel", "in-reply-to"); Elements microformats_index = doc.getElementsByAttributeValueContaining("rel", "index"); Elements microformats_indieauth = doc.getElementsByAttributeValueContaining("rel", "indieauth"); Elements microformats_issues = doc.getElementsByAttributeValueContaining("rel", "issues"); Elements microformats_lightbox = doc.getElementsByAttributeValueContaining("rel", "lightbox"); Elements microformats_meta = doc.getElementsByAttributeValueContaining("rel", "meta"); Elements microformats_openid = doc.getElementsByAttributeValueContaining("rel", "opendid"); Elements microformats_p3pv1 = doc.getElementsByAttributeValueContaining("rel", "p3pv1"); Elements microformats_pgpkey = doc.getElementsByAttributeValueContaining("rel", "pgpkey"); Elements microformats_pingback = doc.getElementsByAttributeValueContaining("rel", "pingback"); Elements microformats_prerender = doc.getElementsByAttributeValueContaining("rel", "prerender"); Elements microformats_profile = doc.getElementsByAttributeValueContaining("rel", "profile"); Elements microformats_rendition = doc.getElementsByAttributeValueContaining("rel", "rendition"); Elements microformats_service = doc.getElementsByAttributeValueContaining("rel", "service"); Elements microformats_shortlink = doc.getElementsByAttributeValueContaining("rel", "shortlink"); Elements microformats_sidebar = doc.getElementsByAttributeValueContaining("rel", "sidebar"); Elements microformats_sitemap = doc.getElementsByAttributeValueContaining("rel", "sitemap"); Elements microformats_subresource = doc.getElementsByAttributeValueContaining("rel", "subresource"); Elements microformats_syndication = doc.getElementsByAttributeValueContaining("rel", "syndication"); Elements microformats_timesheet = doc.getElementsByAttributeValueContaining("rel", "timesheet"); Elements microformats_webmention = doc.getElementsByAttributeValueContaining("rel", "webmention"); Elements microformats_widget = doc.getElementsByAttributeValueContaining("rel", "widget"); Elements microformats_wlwmanifest = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest"); Elements microformats_imgsrc = doc.getElementsByAttributeValueContaining("rel", "image_src"); Elements microformats_cmisacl = doc.getElementsByAttributeValueContaining("rel", "http://docs.oasis-open.org/ns/cmis/link/200908/acl"); Elements microformats_stylesheetless = doc.getElementsByAttributeValueContaining("rel", "stylesheet/less"); Elements microformats_accessibility = doc.getElementsByAttributeValueContaining("rel", "accessibility"); Elements microformats_biblio = doc.getElementsByAttributeValueContaining("rel", "bibliography"); Elements microformats_cite = doc.getElementsByAttributeValueContaining("rel", "cite"); Elements microformats_group = doc.getElementsByAttributeValueContaining("rel", "group"); Elements microformats_jslicence = doc.getElementsByAttributeValueContaining("rel", "jslicense"); Elements microformats_longdesc = doc.getElementsByAttributeValueContaining("rel", "longdesc"); Elements microformats_map = doc.getElementsByAttributeValueContaining("rel", "map"); Elements microformats_member = doc.getElementsByAttributeValueContaining("rel", "member"); Elements microformats_source = doc.getElementsByAttributeValueContaining("rel", "source"); Elements microformats_status = doc.getElementsByAttributeValueContaining("rel", "status"); Elements microformats_archive = doc.getElementsByAttributeValueContaining("rel", "archive"); Elements microformats_archives = doc.getElementsByAttributeValueContaining("rel", "archives"); Elements microformats_comment = doc.getElementsByAttributeValueContaining("rel", "comment"); Elements microformats_contribution = doc.getElementsByAttributeValueContaining("rel", "contribution"); Elements microformats_endorsed = doc.getElementsByAttributeValueContaining("rel", "endorsed"); Elements microformats_fan = doc.getElementsByAttributeValueContaining("rel", "fan"); Elements microformats_feed = doc.getElementsByAttributeValueContaining("rel", "feed"); Elements microformats_footnote = doc.getElementsByAttributeValueContaining("rel", "footnote"); Elements microformats_icon = doc.getElementsByAttributeValueContaining("rel", "icon"); Elements microformats_kinstyle = doc.getElementsByAttributeValueContaining("rel", "kinetic-stylesheet"); Elements microformats_prettyphoto = doc.getElementsByAttributeValueContaining("rel", "prettyPhoto"); Elements microformats_clearbox = doc.getElementsByAttributeValueContaining("rel", "clearbox"); Elements microformats_made = doc.getElementsByAttributeValueContaining("rel", "made"); Elements microformats_microsummary = doc.getElementsByAttributeValueContaining("rel", "microsummary"); Elements microformats_noreferrer = doc.getElementsByAttributeValueContaining("rel", "noreferrer"); Elements microformats_permalink = doc.getElementsByAttributeValueContaining("rel", "permalink"); Elements microformats_popover = doc.getElementsByAttributeValueContaining("rel", "popover"); Elements microformats_prefetch = doc.getElementsByAttributeValueContaining("rel", "prefetch"); Elements microformats_publickey = doc.getElementsByAttributeValueContaining("rel", "publickey"); Elements microformats_publisher = doc.getElementsByAttributeValueContaining("rel", "publisher"); Elements microformats_referral = doc.getElementsByAttributeValueContaining("rel", "referral"); Elements microformats_related = doc.getElementsByAttributeValueContaining("rel", "related"); Elements microformats_replies = doc.getElementsByAttributeValueContaining("rel", "replies"); Elements microformats_resource = doc.getElementsByAttributeValueContaining("rel", "resource"); Elements microformats_search = doc.getElementsByAttributeValueContaining("rel", "search"); Elements microformats_sponsor = doc.getElementsByAttributeValueContaining("rel", "sponsor"); Elements microformats_tooltip = doc.getElementsByAttributeValueContaining("rel", "tooltip"); Elements microformats_trackback = doc.getElementsByAttributeValueContaining("rel", "trackback"); Elements microformats_unendorsed = doc.getElementsByAttributeValueContaining("rel", "unendorsed"); Elements microformats_user = doc.getElementsByAttributeValueContaining("rel", "user"); Elements microformats_wlw = doc.getElementsByAttributeValueContaining("rel", "wlwmanifest"); //-----microformats2 Elements microformats2_hadr = doc.getElementsByAttributeValueContaining("class", "h-adr"); Elements microformats2_hcard = doc.getElementsByAttributeValueContaining("class", "h-card"); Elements microformats2_hentry = doc.getElementsByAttributeValueContaining("class", "h-entry"); Elements microformats2_hevent = doc.getElementsByAttributeValueContaining("class", "h-event"); Elements microformats2_hgeo = doc.getElementsByAttributeValueContaining("class", "h-geo"); Elements microformats2_hitem = doc.getElementsByAttributeValueContaining("class", "h-item"); Elements microformats2_hproduct = doc.getElementsByAttributeValueContaining("class", "h-product"); Elements microformats2_hrecipe = doc.getElementsByAttributeValueContaining("class", "h-recipe"); Elements microformats2_hresume = doc.getElementsByAttributeValueContaining("class", "h-resume"); Elements microformats2_hreview = doc.getElementsByAttributeValueContaining("class", "h-review"); Elements microformats2_hreviewagg = doc.getElementsByAttributeValueContaining("class", "h-review-aggregate"); Elements foaf_autodiscoveries = doc.getElementsByAttributeValueContaining("href", "foaf"); Elements foaf_types = doc.getElementsByAttributeValueContaining("type", "foaf"); Elements media = doc.select("embed"); Elements iframes = doc.select("iframe"); Elements script_el = doc.select("script"); Elements reltags = doc.select("link[rel]"); Elements reltags_a = doc.select("a[rel]"); number_embeded_videos = media.size(); scripts_number = script_el.size(); frames_number = iframes.size(); nschem = schemas.size(); hreln = reltags.size() + reltags_a.size(); foaf = foaf_autodiscoveries.size() + foaf_types.size(); micron1 = microformats_cmisacl.size() + microformats_vcard.size() + microformats_vevent.size() + microformats_hreview.size() + microformats_vgeo.size() + microformats_vcalendar.size() + microformats_vadrn.size() + microformats_acquaintance.size() + microformats_alternate.size() + microformats_appendix.size() + microformats_biblio.size() + microformats_bookmark.size() + microformats_chapter.size() + microformats_child.size() + microformats_coll.size() + microformats_contact.size() + microformats_contents.size() + microformats_copyright.size() + microformats_coresident.size() + microformats_coworker.size() + microformats_crush.size() + microformats_date.size() + microformats_friend.size() + microformats_glossary.size() + microformats_help.size() + microformats_itsrules.size() + microformats_kin.size() + microformats_license.size() + microformats_me.size() + microformats_met.size() + microformats_muse.size() + microformats_neighbor.size() + microformats_next.size() + microformats_nofollow.size() + microformats_parent.size() + microformats_prev.size() + microformats_previous.size() + microformats_section.size() + microformats_sibling.size() + microformats_spouse.size() + microformats_start.size() + microformats_stylesheet.size() + microformats_subsection.size() + microformats_sweetheart.size() + microformats_tag.size() + microformats_toc.size() + microformats_transformation.size() + microformats_appleti.size() + microformats_appletip.size() + microformats_appletsi.size() + microformats_attachment.size() + microformats_can.size() + microformats_categ.size() + microformats_compon.size() + microformats_chromewebi.size() + microformats_disclosure.size() + microformats_discussion.size() + microformats_dns.size() + microformats_edit.size() + microformats_edituri.size() + microformats_entrycon.size() + microformats_external.size() + microformats_home.size() + microformats_hub.size() + microformats_inreplyto.size() + microformats_index.size() + microformats_indieauth.size() + microformats_issues.size() + microformats_lightbox.size() + microformats_meta.size() + microformats_openid.size() + microformats_p3pv1.size() + microformats_pgpkey.size() + microformats_pingback.size() + microformats_prerender.size() + microformats_profile.size() + microformats_rendition.size() + microformats_service.size() + microformats_shortlink.size() + microformats_sidebar.size() + microformats_sitemap.size() + microformats_subresource.size() + microformats_syndication.size() + microformats_timesheet.size() + microformats_webmention.size() + microformats_widget.size() + microformats_wlwmanifest.size() + microformats_imgsrc.size() + microformats_imgsrc.size() + microformats_stylesheetless.size() + microformats_accessibility.size() + microformats_accessibility.size() + microformats_cite.size() + microformats_group.size() + microformats_jslicence.size() + microformats_longdesc.size() + microformats_map.size() + microformats_member.size() + microformats_source.size() + microformats_status.size() + microformats_archive.size() + microformats_archives.size() + microformats_comment.size() + microformats_contribution.size() + microformats_endorsed.size() + microformats_fan.size() + microformats_feed.size() + microformats_footnote.size() + microformats_icon.size() + microformats_kinstyle.size() + microformats_prettyphoto.size() + microformats_clearbox.size() + microformats_made.size() + microformats_microsummary.size() + microformats_noreferrer.size() + microformats_permalink.size() + microformats_popover.size() + microformats_prefetch.size() + microformats_publickey.size() + microformats_publisher.size() + microformats_referral.size() + microformats_related.size() + microformats_replies.size() + microformats_resource.size() + microformats_search.size() + microformats_sponsor.size() + microformats_tooltip.size() + microformats_trackback.size() + microformats_unendorsed.size() + microformats_user.size() + microformats_wlw.size() + foaf; micron2 = microformats2_hadr.size() + microformats2_hcard.size() + microformats2_hentry.size() + microformats2_hevent.size() + microformats2_hgeo.size() + microformats2_hitem.size() + microformats2_hproduct.size() + microformats2_hrecipe.size() + microformats2_hresume.size() + microformats2_hreview.size() + microformats2_hreviewagg.size(); total_micron = micron1 + micron2; microd = microdata.size(); return true; } catch (IOException | IllegalCharsetNameException ex) { Logger.getLogger(com.thesmartweb.swebrank.WebParser.class.getName()).log(Level.SEVERE, null, ex); return false; } }
From source file:no.kantega.publishing.admin.content.htmlfilter.ContextPathFilter.java
@Override public Document runFilter(Document document) { for (String attribute : attributes) { Elements withHref = document.getElementsByAttribute(attribute); fixContextPathForAttribute(withHref, attribute); }// w w w .j ava 2 s . c o m return document; }