Example usage for org.jsoup.nodes Attribute getValue

List of usage examples for org.jsoup.nodes Attribute getValue

Introduction

In this page you can find the example usage for org.jsoup.nodes Attribute getValue.

Prototype

public String getValue() 

Source Link

Document

Get the attribute value.

Usage

From source file:com.astamuse.asta4d.render.RenderUtil.java

private static Map<String, Object> getMessageParams(final Attributes attributes, final Locale locale,
        final String key) {
    List<String> excludeAttrNameList = EXCLUDE_ATTR_NAME_LIST;
    final Map<String, Object> paramMap = new HashMap<>();
    for (Attribute attribute : attributes) {
        String attrKey = attribute.getKey();
        if (excludeAttrNameList.contains(attrKey)) {
            continue;
        }/*  w w  w  .jav a  2 s .co  m*/
        String value = attribute.getValue();

        final String recursiveKey;

        if (attrKey.startsWith("@")) {
            attrKey = attrKey.substring(1);
            recursiveKey = value;
        } else if (attrKey.startsWith("#")) {
            attrKey = attrKey.substring(1);
            // we treat the # prefixed attribute value as a sub key of current key
            if (StringUtils.isEmpty(key)) {
                recursiveKey = value;
            } else {
                recursiveKey = key + "." + value;
            }
        } else {
            recursiveKey = null;
        }

        if (recursiveKey == null) {
            paramMap.put(attrKey, value);
        } else {
            paramMap.put(attrKey, new Object() {
                @Override
                public String toString() {
                    switch (I18nMessageHelperTypeAssistant.configuredHelperType()) {
                    case Mapped:
                        // for the mapped helper, we can pass the parameter map recursively
                        return I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessage(locale,
                                recursiveKey, paramMap);
                    case Ordered:
                    default:
                        return I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessage(locale,
                                recursiveKey);
                    }
                }
            });
        }
    }
    return paramMap;
}

From source file:io.knotx.knot.service.service.ServiceEntry.java

private JsonObject getParams(Attribute paramsAttribute) {
    final JsonObject result;
    if (paramsAttribute == null || StringUtils.isEmpty(paramsAttribute.getValue())) {
        result = new JsonObject();
    } else {/* w w  w. ja  va 2 s . com*/
        result = new JsonObject(paramsAttribute.getValue());
    }
    return result;
}

From source file:io.knotx.knot.service.service.ServiceEntry.java

public ServiceEntry(Attribute serviceAttribute, Attribute paramsAttribute) {
    this.namespace = ServiceAttributeUtil.extractNamespace(serviceAttribute.getKey());
    this.name = serviceAttribute.getValue();
    this.params = getParams(paramsAttribute);
    this.cacheKey = String.format("%s|%s", getName(), getParams());
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public void getCategorizedEvents(String type) {
    try {//from  w  ww  . j ava  2 s  .  c  o m

        doc = Jsoup.connect("https://afisha.yandex.ru/msk/events/?category=" + type + "&limit=1000").get();

        Elements els = doc.select("a[href]");

        for (Element e : els) {

            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("clck.yandex.ru")) {

                    if (attr.getValue().charAt(97) != '/') {
                        getEvent(attr.getValue().substring(90, 96), type);

                    } else {
                        getEvent(attr.getValue().substring(90, 97), type);

                    }
                }
            }

        }
    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public void getEvent(String eventId, String evType) {
    try {/* w w  w .  java  2  s .c o  m*/

        Document dc = Jsoup.connect("https://afisha.yandex.ru/msk/events/" + eventId + "/").get();

        Event eb = new Event();
        eb.setEventID(eventId);
        eb.setCategory(eventTypes.get(evType));
        Elements elems = dc.select("meta");

        for (Element e : elems) {
            if (e.attributes().get("property").contains("og:description")) {
                eb.setDescription(e.attributes().get("content"));

            }

        }

        elems = dc.select("title");

        for (Element e : elems) {

            eb.setName(e.html().substring(0, e.html().indexOf("")));
        }

        elems = dc.select("a[href]");

        for (Element e : elems) {

            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("/msk/places/")) {

                    eb.setPlace(getEventPlaces(attr.getValue()));

                }
            }

        }

        elems = dc.select("tr[id]");

        for (Element e : elems) {
            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("f")) {

                    eb.setDate(e.children().first().html());

                    try {
                        Element e1 = e.child(1).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();

                        eb.setTime(e4.html());

                    } catch (NullPointerException ex) {

                        Element e1 = e.child(2).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();
                        eb.setTime(e4.html());
                    }
                }
            }

        }

        geoCode(eb);
        formJson(eb);

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:com.screenslicer.core.util.Util.java

public static String urlFromAttr(Node node) {
    for (Attribute attr : node.attributes().asList()) {
        if (attr.getValue().contains("://")) {
            return attr.getValue();
        }/*from   www  . jav  a2 s . c o m*/
    }
    return null;
}

From source file:com.kingfong.webcrawler.util.DOMContentUtils.java

/**
 * This method finds all anchors below the supplied DOM
 * <code>node</code>, and creates appropriate {@link Outlink}
 * records for each (relative to the supplied <code>base</code>
 * URL), and adds them to the <code>outlinks</code> {@link
 * ArrayList}.//  ww w  .j a v  a  2s .c  o m
 *
 * <p>
 *
 * Links without inner structure (tags, text, etc) are discarded, as
 * are links which contain only single nested links and empty text
 * nodes (this is a common DOM-fixup artifact, at least with
 * nekohtml).
 */
public void getOutlinks(String html, URL url, HashSet<String> outlinks) {

    Document document = Jsoup.parse(html);
    Elements elements = document.getAllElements();
    for (Element currentNode : elements) {
        String nodeName = currentNode.tagName();
        // short nodeType = currentNode.;
        Elements children = currentNode.children();
        nodeName = nodeName.toLowerCase();
        LinkParams params = linkParams.get(nodeName);
        if (params != null) {
            // if (!shouldThrowAwayLink(currentNode, children, childLen,
            // params)) {

            // StringBuilder linkText = new StringBuilder();
            // getText(linkText, currentNode, true);

            Attributes attrs = currentNode.attributes();
            String target = null;
            boolean noFollow = false;
            boolean post = false;
            Iterator<Attribute> iterator = attrs.iterator();
            while (iterator.hasNext()) {
                Attribute attr = iterator.next();
                String attrName = attr.getKey();
                if (params.attrName.equalsIgnoreCase(attrName)) {
                    target = attr.getValue();
                } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getValue())) {
                    noFollow = true;
                } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getValue())) {
                    post = true;
                }
            }
            if (StringUtils.startsWith(target, "/")) {
                target = url.getProtocol() + "://" + url.getHost() + target;
            }
            if (target != null && URLFilter.filt(target)) {
                outlinks.add(target);
            }
            // }
            // this should not have any children, skip them
            if (params.childLen == 0)
                continue;
        }
    }
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * pulls out videos we like/*  w  ww . ja  v  a2s . c o  m*/
 *
 * @return
 */
private ArrayList<Element> extractVideos(Element node) {
    ArrayList<Element> candidates = new ArrayList<Element>();
    ArrayList<Element> goodMovies = new ArrayList<Element>();
    try {

        Elements embeds = node.parent().getElementsByTag("embed");
        for (Element el : embeds) {
            candidates.add(el);
        }
        Elements objects = node.parent().getElementsByTag("object");
        for (Element el : objects) {
            candidates.add(el);
        }
        if (logger.isDebugEnabled()) {
            logger.debug("extractVideos: Starting to extract videos. Found: " + candidates.size());
        }

        for (Element el : candidates) {

            Attributes attrs = el.attributes();

            for (Attribute a : attrs) {
                try {
                    if (logger.isDebugEnabled()) {
                        logger.debug(a.getKey() + " : " + a.getValue());
                    }
                    if ((a.getValue().contains("youtube") || a.getValue().contains("vimeo"))
                            && a.getKey().equals("src")) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Found video... setting");
                            logger.debug("This page has a video!: " + a.getValue());
                        }
                        goodMovies.add(el);

                    }
                } catch (Exception e) {
                    logger.error(e.toString());
                    e.printStackTrace();
                }
            }

        }
    } catch (NullPointerException e) {
        logger.error(e.toString(), e);
    } catch (Exception e) {
        logger.error(e.toString(), e);
    }
    if (logger.isDebugEnabled()) {
        logger.debug("extractVideos:  done looking videos");
    }
    return goodMovies;
}

From source file:no.kantega.publishing.modules.linkcheck.crawl.LinkExtractor.java

private void handleAttribute(Content content, LinkHandler linkHandler, Attribute attribute) {
    String attrName = (isNotBlank(attribute.getTitle())) ? attribute.getTitle() : attribute.getName();
    if (attribute instanceof HtmltextAttribute) {
        String html = attribute.getValue();
        try {/*  ww  w  . java  2  s.  c o  m*/
            if (html != null) {
                Elements links = Jsoup.parse(html).select("a[href]");
                for (Element link : links) {
                    String href = link.attr("href");
                    linkHandler.attributeLinkFound(content, href, attrName);

                }
            }
        } catch (Throwable e) {
            eventLog.log("LinkExtractor", "localhost", Event.FAILED_LINK_EXTRACT,
                    String.format("Failed to extract links from %s", content.getUrl()), content);
            log.error("contentId: {}, associationid: {}, attribute: {} {}", content.getId(),
                    content.getAssociation().getId(), attrName, html);
        }
    } else if (attribute instanceof UrlAttribute) {
        String link = attribute.getValue();
        if (link != null && link.length() > 0) {
            if (link.startsWith("/")) {
                link = Aksess.VAR_WEB + link;
            }
            linkHandler.attributeLinkFound(content, link, attrName);
        }
    } else if (attribute instanceof FileAttribute && isNotBlank(attribute.getValue())) {
        try {
            int attachmentId = Integer.parseInt(attribute.getValue());
            String link = Aksess.VAR_WEB + "/attachment.ap?id=" + attachmentId;
            linkHandler.attributeLinkFound(content, link, attrName);
        } catch (Exception e) {
            log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(),
                    attribute.getName(), attribute.getValue());
        }
    } else if (attribute instanceof MediaAttribute && isNotBlank(attribute.getValue())) {
        try {
            int mediaId = Integer.parseInt(attribute.getValue());
            String link = Aksess.VAR_WEB + "/multimedia.ap?id=" + mediaId;
            linkHandler.attributeLinkFound(content, link, attrName);
        } catch (Exception e) {
            log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(),
                    attribute.getName(), attribute.getValue());
        }
    } else if (attribute instanceof RepeaterAttribute) {
        RepeaterAttribute repeaterAttribute = (RepeaterAttribute) attribute;
        for (List<Attribute> attributes : repeaterAttribute) {
            for (Attribute a : attributes) {
                handleAttribute(content, linkHandler, a);
            }
        }
    }
}