List of usage examples for org.jsoup.nodes Attribute getValue
public String getValue()
From source file:com.astamuse.asta4d.render.RenderUtil.java
private static Map<String, Object> getMessageParams(final Attributes attributes, final Locale locale, final String key) { List<String> excludeAttrNameList = EXCLUDE_ATTR_NAME_LIST; final Map<String, Object> paramMap = new HashMap<>(); for (Attribute attribute : attributes) { String attrKey = attribute.getKey(); if (excludeAttrNameList.contains(attrKey)) { continue; }/* w w w .jav a 2 s .co m*/ String value = attribute.getValue(); final String recursiveKey; if (attrKey.startsWith("@")) { attrKey = attrKey.substring(1); recursiveKey = value; } else if (attrKey.startsWith("#")) { attrKey = attrKey.substring(1); // we treat the # prefixed attribute value as a sub key of current key if (StringUtils.isEmpty(key)) { recursiveKey = value; } else { recursiveKey = key + "." + value; } } else { recursiveKey = null; } if (recursiveKey == null) { paramMap.put(attrKey, value); } else { paramMap.put(attrKey, new Object() { @Override public String toString() { switch (I18nMessageHelperTypeAssistant.configuredHelperType()) { case Mapped: // for the mapped helper, we can pass the parameter map recursively return I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessage(locale, recursiveKey, paramMap); case Ordered: default: return I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessage(locale, recursiveKey); } } }); } } return paramMap; }
From source file:io.knotx.knot.service.service.ServiceEntry.java
private JsonObject getParams(Attribute paramsAttribute) { final JsonObject result; if (paramsAttribute == null || StringUtils.isEmpty(paramsAttribute.getValue())) { result = new JsonObject(); } else {/* w w w. ja va 2 s . com*/ result = new JsonObject(paramsAttribute.getValue()); } return result; }
From source file:io.knotx.knot.service.service.ServiceEntry.java
public ServiceEntry(Attribute serviceAttribute, Attribute paramsAttribute) { this.namespace = ServiceAttributeUtil.extractNamespace(serviceAttribute.getKey()); this.name = serviceAttribute.getValue(); this.params = getParams(paramsAttribute); this.cacheKey = String.format("%s|%s", getName(), getParams()); }
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public void getCategorizedEvents(String type) { try {//from w ww . j ava 2 s . c o m doc = Jsoup.connect("https://afisha.yandex.ru/msk/events/?category=" + type + "&limit=1000").get(); Elements els = doc.select("a[href]"); for (Element e : els) { for (Attribute attr : e.attributes().asList()) { if (attr.getValue().contains("clck.yandex.ru")) { if (attr.getValue().charAt(97) != '/') { getEvent(attr.getValue().substring(90, 96), type); } else { getEvent(attr.getValue().substring(90, 97), type); } } } } } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public void getEvent(String eventId, String evType) { try {/* w w w . java 2 s .c o m*/ Document dc = Jsoup.connect("https://afisha.yandex.ru/msk/events/" + eventId + "/").get(); Event eb = new Event(); eb.setEventID(eventId); eb.setCategory(eventTypes.get(evType)); Elements elems = dc.select("meta"); for (Element e : elems) { if (e.attributes().get("property").contains("og:description")) { eb.setDescription(e.attributes().get("content")); } } elems = dc.select("title"); for (Element e : elems) { eb.setName(e.html().substring(0, e.html().indexOf(""))); } elems = dc.select("a[href]"); for (Element e : elems) { for (Attribute attr : e.attributes().asList()) { if (attr.getValue().contains("/msk/places/")) { eb.setPlace(getEventPlaces(attr.getValue())); } } } elems = dc.select("tr[id]"); for (Element e : elems) { for (Attribute attr : e.attributes().asList()) { if (attr.getValue().contains("f")) { eb.setDate(e.children().first().html()); try { Element e1 = e.child(1).children().first(); Element e2 = e1.children().first(); Element e3 = e2.children().first(); Element e4 = e3.children().first(); eb.setTime(e4.html()); } catch (NullPointerException ex) { Element e1 = e.child(2).children().first(); Element e2 = e1.children().first(); Element e3 = e2.children().first(); Element e4 = e3.children().first(); eb.setTime(e4.html()); } } } } geoCode(eb); formJson(eb); } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.screenslicer.core.util.Util.java
public static String urlFromAttr(Node node) { for (Attribute attr : node.attributes().asList()) { if (attr.getValue().contains("://")) { return attr.getValue(); }/*from www . jav a2 s . c o m*/ } return null; }
From source file:com.kingfong.webcrawler.util.DOMContentUtils.java
/** * This method finds all anchors below the supplied DOM * <code>node</code>, and creates appropriate {@link Outlink} * records for each (relative to the supplied <code>base</code> * URL), and adds them to the <code>outlinks</code> {@link * ArrayList}.// ww w .j a v a 2s .c o m * * <p> * * Links without inner structure (tags, text, etc) are discarded, as * are links which contain only single nested links and empty text * nodes (this is a common DOM-fixup artifact, at least with * nekohtml). */ public void getOutlinks(String html, URL url, HashSet<String> outlinks) { Document document = Jsoup.parse(html); Elements elements = document.getAllElements(); for (Element currentNode : elements) { String nodeName = currentNode.tagName(); // short nodeType = currentNode.; Elements children = currentNode.children(); nodeName = nodeName.toLowerCase(); LinkParams params = linkParams.get(nodeName); if (params != null) { // if (!shouldThrowAwayLink(currentNode, children, childLen, // params)) { // StringBuilder linkText = new StringBuilder(); // getText(linkText, currentNode, true); Attributes attrs = currentNode.attributes(); String target = null; boolean noFollow = false; boolean post = false; Iterator<Attribute> iterator = attrs.iterator(); while (iterator.hasNext()) { Attribute attr = iterator.next(); String attrName = attr.getKey(); if (params.attrName.equalsIgnoreCase(attrName)) { target = attr.getValue(); } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getValue())) { noFollow = true; } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getValue())) { post = true; } } if (StringUtils.startsWith(target, "/")) { target = url.getProtocol() + "://" + url.getHost() + target; } if (target != null && URLFilter.filt(target)) { outlinks.add(target); } // } // this should not have any children, skip them if (params.childLen == 0) continue; } } }
From source file:com.jimplush.goose.ContentExtractor.java
/** * pulls out videos we like/* w ww . ja v a2s . c o m*/ * * @return */ private ArrayList<Element> extractVideos(Element node) { ArrayList<Element> candidates = new ArrayList<Element>(); ArrayList<Element> goodMovies = new ArrayList<Element>(); try { Elements embeds = node.parent().getElementsByTag("embed"); for (Element el : embeds) { candidates.add(el); } Elements objects = node.parent().getElementsByTag("object"); for (Element el : objects) { candidates.add(el); } if (logger.isDebugEnabled()) { logger.debug("extractVideos: Starting to extract videos. Found: " + candidates.size()); } for (Element el : candidates) { Attributes attrs = el.attributes(); for (Attribute a : attrs) { try { if (logger.isDebugEnabled()) { logger.debug(a.getKey() + " : " + a.getValue()); } if ((a.getValue().contains("youtube") || a.getValue().contains("vimeo")) && a.getKey().equals("src")) { if (logger.isDebugEnabled()) { logger.debug("Found video... setting"); logger.debug("This page has a video!: " + a.getValue()); } goodMovies.add(el); } } catch (Exception e) { logger.error(e.toString()); e.printStackTrace(); } } } } catch (NullPointerException e) { logger.error(e.toString(), e); } catch (Exception e) { logger.error(e.toString(), e); } if (logger.isDebugEnabled()) { logger.debug("extractVideos: done looking videos"); } return goodMovies; }
From source file:no.kantega.publishing.modules.linkcheck.crawl.LinkExtractor.java
private void handleAttribute(Content content, LinkHandler linkHandler, Attribute attribute) { String attrName = (isNotBlank(attribute.getTitle())) ? attribute.getTitle() : attribute.getName(); if (attribute instanceof HtmltextAttribute) { String html = attribute.getValue(); try {/* ww w . java 2 s. c o m*/ if (html != null) { Elements links = Jsoup.parse(html).select("a[href]"); for (Element link : links) { String href = link.attr("href"); linkHandler.attributeLinkFound(content, href, attrName); } } } catch (Throwable e) { eventLog.log("LinkExtractor", "localhost", Event.FAILED_LINK_EXTRACT, String.format("Failed to extract links from %s", content.getUrl()), content); log.error("contentId: {}, associationid: {}, attribute: {} {}", content.getId(), content.getAssociation().getId(), attrName, html); } } else if (attribute instanceof UrlAttribute) { String link = attribute.getValue(); if (link != null && link.length() > 0) { if (link.startsWith("/")) { link = Aksess.VAR_WEB + link; } linkHandler.attributeLinkFound(content, link, attrName); } } else if (attribute instanceof FileAttribute && isNotBlank(attribute.getValue())) { try { int attachmentId = Integer.parseInt(attribute.getValue()); String link = Aksess.VAR_WEB + "/attachment.ap?id=" + attachmentId; linkHandler.attributeLinkFound(content, link, attrName); } catch (Exception e) { log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(), attribute.getName(), attribute.getValue()); } } else if (attribute instanceof MediaAttribute && isNotBlank(attribute.getValue())) { try { int mediaId = Integer.parseInt(attribute.getValue()); String link = Aksess.VAR_WEB + "/multimedia.ap?id=" + mediaId; linkHandler.attributeLinkFound(content, link, attrName); } catch (Exception e) { log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(), attribute.getName(), attribute.getValue()); } } else if (attribute instanceof RepeaterAttribute) { RepeaterAttribute repeaterAttribute = (RepeaterAttribute) attribute; for (List<Attribute> attributes : repeaterAttribute) { for (Attribute a : attributes) { handleAttribute(content, linkHandler, a); } } } }