Example usage for com.google.gwt.dom.client AnchorElement getHref

List of usage examples for com.google.gwt.dom.client AnchorElement getHref

Introduction

In this page you can find the example usage for com.google.gwt.dom.client AnchorElement getHref.

Prototype

public String getHref() 

Source Link

Document

The absolute URI of the linked resource.

Usage

From source file:ch.unifr.pai.twice.widgets.mpproxy.client.ProxyBody.java

License:Apache License

/**
 * Client side logic to rewrite a given URL
 * /*  w w w  . java2  s. com*/
 * @param element
 * @param servletPath
 * @param proxyPath
 */
public static void rewriteUrl(com.google.gwt.dom.client.Element element, String servletPath, String proxyPath) {
    NodeList<Node> nodes = element.getChildNodes();
    for (int i = 0; i < nodes.getLength(); i++) {
        Node n = nodes.getItem(i);
        if (com.google.gwt.dom.client.Element.is(n)) {
            com.google.gwt.dom.client.Element e = com.google.gwt.dom.client.Element.as(n);
            if (e != null && e.getTagName() != null && e.getTagName().equalsIgnoreCase("a")) {
                AnchorElement anchor = AnchorElement.as(e);
                if (anchor.getHref() != null && !anchor.getHref().isEmpty())
                    anchor.removeAttribute("onmousedown");
            }
            for (String att : attributesToManipulate) {
                String value = e.getAttribute(att);
                if (value != null && !value.startsWith(servletPath) && value.matches("((http)|/).*")) {
                    String transformed = Rewriter.translateCleanUrl(value, servletPath, proxyPath);
                    if (!transformed.equals(value))
                        e.setAttribute(att, transformed);
                }
            }
            rewriteUrl(e, servletPath, proxyPath);
        }
    }
}

From source file:com.dom_distiller.client.ContentExtractor.java

License:Open Source License

private static void makeAllLinksAbsolute(Node rootNode) {
    Element root = Element.as(rootNode);

    // AnchorElement.getHref() and ImageElement.getSrc() both return the
    // absolute URI, so simply set them as the respective attributes.

    NodeList<Element> allLinks = root.getElementsByTagName("A");
    for (int i = 0; i < allLinks.getLength(); i++) {
        AnchorElement link = AnchorElement.as(allLinks.getItem(i));
        if (!link.getHref().isEmpty()) {
            link.setHref(link.getHref());
        }//from   w  w w.  j av  a 2  s.c  o m
    }
    NodeList<Element> videoTags = root.getElementsByTagName("VIDEO");
    for (int i = 0; i < videoTags.getLength(); i++) {
        VideoElement video = (VideoElement) videoTags.getItem(i);
        if (!video.getPoster().isEmpty()) {
            video.setPoster(video.getPoster());
        }
    }
    makeAllSrcAttributesAbsolute(root);
}

From source file:com.dom_distiller.client.PagingLinksFinder.java

License:Open Source License

private static String findPagingLink(Element root, String original_domain, PageLink pageLink) {
    // findPagingLink() is static, so clear mLinkDebugInfo before processing the links.
    if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_PAGING_INFO)) {
        mLinkDebugInfo.clear();/* ww w  .jav a2 s  . c o  m*/
    }

    String baseUrl = findBaseUrl(original_domain);
    // Remove trailing '/' from window location href, because it'll be used to compare with
    // other href's whose trailing '/' are also removed.
    String wndLocationHref = StringUtil.findAndReplace(Window.Location.getHref(), "\\/$", "");
    NodeList<Element> allLinks = root.getElementsByTagName("A");
    Map<String, PagingLinkObj> possiblePages = new HashMap<String, PagingLinkObj>();

    // Loop through all links, looking for hints that they may be next- or previous- page links.
    // Things like having "page" in their textContent, className or id, or being a child of a
    // node with a page-y className or id.
    // Also possible: levenshtein distance? longest common subsequence?
    // After we do that, assign each page a score.
    for (int i = 0; i < allLinks.getLength(); i++) {
        AnchorElement link = AnchorElement.as(allLinks.getItem(i));

        int width = link.getOffsetWidth();
        int height = link.getOffsetHeight();
        if (width == 0 || height == 0) {
            appendDbgStrForLink(link, "ignored: sz=" + width + "x" + height);
            continue;
        }

        if (!DomUtil.isVisible(link)) {
            appendDbgStrForLink(link, "ignored: invisible");
            continue;
        }

        // Remove url anchor and then trailing '/' from link's href.
        // Note that AnchorElement.getHref() returns the absolute URI, so there's no need to
        // worry about relative links.
        String linkHref = StringUtil.findAndReplace(StringUtil.findAndReplace(link.getHref(), "#.*$", ""),
                "\\/$", "");

        // Ignore page link that is empty, not http/https, or same as current window location.
        // If the page link is same as the base URL:
        // - next page link: ignore it, since we would already have seen it.
        // - previous page link: don't ignore it, since some sites will simply have the same
        //                       base URL for the first page.
        if (linkHref.isEmpty() || !StringUtil.match(linkHref, "^https?://")
                || linkHref.equalsIgnoreCase(wndLocationHref)
                || (pageLink == PageLink.NEXT && linkHref.equalsIgnoreCase(baseUrl))) {
            appendDbgStrForLink(link, "ignored: empty or same as current or base url" + baseUrl);
            continue;
        }

        // If it's on a different domain, skip it.
        String[] urlSlashes = StringUtil.split(linkHref, "\\/+");
        if (urlSlashes.length < 3 || // Expect at least the protocol, domain, and path.
                !getLocationHost(original_domain).equalsIgnoreCase(urlSlashes[1])) {
            appendDbgStrForLink(link, "ignored: different domain");
            continue;
        }

        // Use javascript innerText (instead of javascript textContent) to only get visible
        // text.
        String linkText = DomUtil.getInnerText(link);

        // If the linkText looks like it's not the next or previous page, skip it.
        if (StringUtil.match(linkText, EXTRANEOUS_REGEX) || linkText.length() > 25) {
            appendDbgStrForLink(link, "ignored: one of extra");
            continue;
        }

        // For next page link, if the initial part of the URL is identical to the base URL, but
        // the rest of it doesn't contain any digits, it's certainly not a next page link.
        // However, this doesn't apply to previous page link, because most sites will just have
        // the base URL for the first page.
        // TODO(kuan): baseUrl (returned by findBaseUrl()) is NOT the prefix of the current
        // window location, even though it appears to be so the way it's used here.
        // TODO(kuan): do we need to apply this heuristic to previous page links if current page
        // number is not 2?
        if (pageLink == PageLink.NEXT) {
            String linkHrefRemaining = StringUtil.findAndReplace(linkHref, baseUrl, "");
            if (!StringUtil.match(linkHrefRemaining, "\\d")) {
                appendDbgStrForLink(link, "ignored: no number beyond base url " + baseUrl);
                continue;
            }
        }

        PagingLinkObj linkObj = null;
        if (!possiblePages.containsKey(linkHref)) { // Have not encountered this href.
            linkObj = new PagingLinkObj(i, 0, linkText, linkHref);
            possiblePages.put(linkHref, linkObj);
        } else { // Have already encountered this href, append its text to existing entry's.
            linkObj = possiblePages.get(linkHref);
            linkObj.mLinkText += " | " + linkText;
        }

        // If the base URL isn't part of this URL, penalize this link.  It could still be the
        // link, but the odds are lower.
        // Example: http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html.
        // TODO(kuan): again, baseUrl (returned by findBaseUrl()) is NOT the prefix of the
        // current window location, even though it appears to be so the way it's used here.
        if (linkHref.indexOf(baseUrl) != 0) {
            linkObj.mScore -= 25;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": not part of base url " + baseUrl);
        }

        // Concatenate the link text with class name and id, and determine the score based on
        // existence of various paging-related words.
        String linkData = linkText + " " + link.getClassName() + " " + link.getId();
        appendDbgStrForLink(link, "txt+class+id=" + linkData);
        if (StringUtil.match(linkData, pageLink == PageLink.NEXT ? NEXT_LINK_REGEX : PREV_LINK_REGEX)) {
            linkObj.mScore += 50;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has "
                    + (pageLink == PageLink.NEXT ? "next" : "prev" + " regex"));
        }
        if (StringUtil.match(linkData, "pag(e|ing|inat)")) {
            linkObj.mScore += 25;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has pag* word");
        }
        if (StringUtil.match(linkData, "(first|last)")) {
            // -65 is enough to negate any bonuses gotten from a > or  in the text.
            // If we already matched on "next", last is probably fine.
            // If we didn't, then it's bad.  Penalize.
            // Same for "prev".
            if ((pageLink == PageLink.NEXT && !StringUtil.match(linkObj.mLinkText, NEXT_LINK_REGEX))
                    || (pageLink == PageLink.PREV && !StringUtil.match(linkObj.mLinkText, PREV_LINK_REGEX))) {
                linkObj.mScore -= 65;
                appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has first|last but no "
                        + (pageLink == PageLink.NEXT ? "next" : "prev") + " regex");
            }
        }
        if (StringUtil.match(linkData, NEGATIVE_REGEX) || StringUtil.match(linkData, EXTRANEOUS_REGEX)) {
            linkObj.mScore -= 50;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has neg or extra regex");
        }
        if (StringUtil.match(linkData, pageLink == PageLink.NEXT ? PREV_LINK_REGEX : NEXT_LINK_REGEX)) {
            linkObj.mScore -= 200;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has opp of "
                    + (pageLink == PageLink.NEXT ? "next" : "prev") + " regex");
        }

        // Check if a parent element contains page or paging or paginate.
        boolean positiveMatch = false, negativeMatch = false;
        Element parent = link.getParentElement();
        while (parent != null && (positiveMatch == false || negativeMatch == false)) {
            String parentClassAndId = parent.getClassName() + " " + parent.getId();
            if (!positiveMatch && StringUtil.match(parentClassAndId, "pag(e|ing|inat)")) {
                linkObj.mScore += 25;
                positiveMatch = true;
                appendDbgStrForLink(link, "score=" + linkObj.mScore + ": posParent - " + parentClassAndId);
            }
            // TODO(kuan): to get 1st page for prev page link, this can't be applied; however,
            // the non-application might be the cause of recursive prev page being returned,
            // i.e. for page 1, it may incorrectly return page 3 for prev page link.
            if (!negativeMatch && StringUtil.match(parentClassAndId, NEGATIVE_REGEX)) {
                // If this is just something like "footer", give it a negative.
                // If it's something like "body-and-footer", leave it be.
                if (!StringUtil.match(parentClassAndId, POSITIVE_REGEX)) {
                    linkObj.mScore -= 25;
                    negativeMatch = true;
                    appendDbgStrForLink(link, "score=" + linkObj.mScore + ": negParent - " + parentClassAndId);
                }
            }
            parent = parent.getParentElement();
        }

        // If the URL looks like it has paging in it, add to the score.
        // Things like /page/2/, /pagenum/2, ?p=3, ?page=11, ?pagination=34.
        if (StringUtil.match(linkHref, "p(a|g|ag)?(e|ing|ination)?(=|\\/)[0-9]{1,2}")
                || StringUtil.match(linkHref, "(page|paging)")) {
            linkObj.mScore += 25;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has paging info");
        }

        // If the URL contains negative values, give a slight decrease.
        if (StringUtil.match(linkHref, EXTRANEOUS_REGEX)) {
            linkObj.mScore -= 15;
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": has extra regex");
        }

        // If the link text can be parsed as a number, give it a minor bonus, with a slight bias
        // towards lower numbered pages.  This is so that pages that might not have 'next' in
        // their text can still get scored, and sorted properly by score.
        // TODO(kuan): it might be wrong to assume that it knows about other pages in the
        // document and that it starts on the first page.
        int linkTextAsNumber = 0;
        try {
            linkTextAsNumber = Integer.parseInt(linkText, 10);
        } catch (NumberFormatException e) {
        }
        if (linkTextAsNumber > 0) {
            // Punish 1 since we're either already there, or it's probably before what we
            // want anyway.
            if (linkTextAsNumber == 1) {
                linkObj.mScore -= 10;
            } else {
                linkObj.mScore += Math.max(0, 10 - linkTextAsNumber);
            }
            appendDbgStrForLink(link, "score=" + linkObj.mScore + ": linktxt is a num");
        }
    } // for all links

    // Loop through all of the possible pages from above and find the top candidate for the next
    // page URL.  Require at least a score of 50, which is a relatively high confidence that
    // this page is the next link.
    PagingLinkObj topPage = null;
    if (!possiblePages.isEmpty()) {
        Collection<PagingLinkObj> possiblePageObjs = possiblePages.values();
        Iterator<PagingLinkObj> iter = possiblePageObjs.iterator();
        while (iter.hasNext()) {
            PagingLinkObj pageObj = iter.next();
            if (pageObj.mScore >= 50 && (topPage == null || topPage.mScore < pageObj.mScore)) {
                topPage = pageObj;
            }
        }
    }

    String pagingHref = null;
    if (topPage != null) {
        pagingHref = StringUtil.findAndReplace(topPage.mLinkHref, "\\/$", "");
        appendDbgStrForLink(allLinks.getItem(topPage.mLinkIndex),
                "found: score=" + topPage.mScore + ", txt=[" + topPage.mLinkText + "], " + pagingHref);
    }

    if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_PAGING_INFO)) {
        logDbgInfoToConsole(pageLink, pagingHref, allLinks);
    }

    return pagingHref;
}

From source file:com.dom_distiller.client.PagingLinksFinder.java

License:Open Source License

private static void logDbgInfoToConsole(PageLink pageLink, String pagingHref, NodeList<Element> allLinks) {
    // This logs the following to the console:
    // - number of links processed
    // - the next or previous page link found
    // - for each link: its href, text, concatenated debug string.
    // Location of logging output is different when running in different modes:
    // - "ant test.dev": test output file.
    // - chrome browser distiller viewer: chrome logfile.
    // (TODO)kuan): investigate how to get logging when running "ant test.prod" - currently,
    // nothing appears.  In the meantime, throwing an exception with a log message at suspicious
    // codepoints can produce a call stack and help debugging, albeit tediously.
    LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found "
            + (pageLink == PageLink.NEXT ? "next: " : "prev: ") + (pagingHref != null ? pagingHref : "null"));

    for (int i = 0; i < allLinks.getLength(); i++) {
        AnchorElement link = AnchorElement.as(allLinks.getItem(i));

        // Use javascript innerText (instead of javascript textContent) to get only visible
        // text./*from   ww w . j  a  va2  s .  c  om*/
        String text = DomUtil.getInnerText(link);
        // Trim unnecessary whitespaces from text.
        String[] words = StringUtil.split(text, "\\s+");
        text = "";
        for (int w = 0; w < words.length; w++) {
            text += words[w];
            if (w < words.length - 1)
                text += " ";
        }

        LogUtil.logToConsole(
                i + ")" + link.getHref() + ", txt=[" + text + "], dbg=[" + mLinkDebugInfo.get(link) + "]");
    }
}

From source file:com.vaadin.client.WidgetUtil.java

License:Apache License

/**
 * Resolve a relative URL to an absolute URL based on the current document's
 * location./*ww w. ja  va  2s  . c om*/
 * 
 * @param url
 *            a string with the relative URL to resolve
 * @return the corresponding absolute URL as a string
 */
public static String getAbsoluteUrl(String url) {
    if (BrowserInfo.get().isIE8()) {
        // The hard way - must use innerHTML and attach to DOM in IE8
        DivElement divElement = Document.get().createDivElement();
        divElement.getStyle().setDisplay(Display.NONE);

        RootPanel.getBodyElement().appendChild(divElement);
        divElement.setInnerHTML("<a href='" + escapeAttribute(url) + "' ></a>");

        AnchorElement a = divElement.getChild(0).cast();
        String href = a.getHref();

        RootPanel.getBodyElement().removeChild(divElement);
        return href;
    } else {
        AnchorElement a = Document.get().createAnchorElement();
        a.setHref(url);
        return a.getHref();
    }
}

From source file:org.chromium.distiller.DomUtil.java

License:Open Source License

/**
 * Makes all anchors and video posters absolute. This calls "makeAllSrcAttributesAbsolute".
 * @param rootNode The root Node to look through.
 *//*from w ww  .ja v  a2s . c om*/
public static void makeAllLinksAbsolute(Node rootNode) {
    Element root = Element.as(rootNode);

    // AnchorElement.getHref() and ImageElement.getSrc() both return the
    // absolute URI, so simply set them as the respective attributes.

    if ("A".equals(root.getTagName())) {
        AnchorElement link = AnchorElement.as(root);
        if (!link.getHref().isEmpty()) {
            link.setHref(link.getHref());
        }
    }
    NodeList<Element> allLinks = root.getElementsByTagName("A");
    for (int i = 0; i < allLinks.getLength(); i++) {
        AnchorElement link = AnchorElement.as(allLinks.getItem(i));
        if (!link.getHref().isEmpty()) {
            link.setHref(link.getHref());
        }
    }
    if (root.getTagName().equals("VIDEO")) {
        VideoElement video = (VideoElement) root;
        if (!video.getPoster().isEmpty()) {
            video.setPoster(video.getPoster());
        }
    }
    NodeList<Element> videoTags = root.getElementsByTagName("VIDEO");
    for (int i = 0; i < videoTags.getLength(); i++) {
        VideoElement video = (VideoElement) videoTags.getItem(i);
        if (!video.getPoster().isEmpty()) {
            video.setPoster(video.getPoster());
        }
    }
    makeAllSrcAttributesAbsolute(root);

    makeSrcSetAbsolute(root);
}

From source file:org.chromium.distiller.extractors.embeds.TwitterExtractor.java

License:Open Source License

/**
 * Handle a Twitter embed that has not yet been rendered.
 * @param e The root element of the embed (should be a "blockquote").
 * @return EmbeddedElement object representing the embed or null.
 *//* www . jav  a2 s  . co  m*/
private WebEmbed handleNotRendered(Element e) {
    // Make sure the characteristic class name for Twitter exists.
    if (!e.getClassName().contains("twitter-tweet")) {
        return null;
    }

    // Get the last anchor element in this section; it should contain the tweet id.
    NodeList<Element> anchors = e.getElementsByTagName("a");
    if (anchors.getLength() == 0) {
        return null;
    }

    AnchorElement tweetAnchor = AnchorElement.as(anchors.getItem(anchors.getLength() - 1));

    if (!DomUtil.hasRootDomain(tweetAnchor.getHref(), "twitter.com")) {
        return null;
    }

    // Get specific attributes about the Twitter embed.
    String path = tweetAnchor.getPropertyString("pathname");

    String id = getTweetIdFromPath(path);
    if (id == null) {
        return null;
    }

    return new WebEmbed(e, "twitter", id, null);
}

From source file:org.chromium.distiller.PageParameterParser.java

License:Open Source License

private static String resolveLinkHref(AnchorElement link, AnchorElement baseAnchor) {
    // Anchors without "href" attribute are not considered potential pagination links.
    String linkHref = link.getAttribute("href");
    if (linkHref.isEmpty())
        return "";
    baseAnchor.setAttribute("href", linkHref);
    return baseAnchor.getHref();
}

From source file:org.chromium.distiller.PagingLinksFinder.java

License:Open Source License

public static String resolveLinkHref(String linkHref, AnchorElement baseAnchor) {
    baseAnchor.setAttribute("href", linkHref);
    return baseAnchor.getHref();
}

From source file:org.chromium.distiller.PagingLinksFinder.java

License:Open Source License

private static void logDbgInfoToConsole(PageLink pageLink, String pagingHref, NodeList<Element> allLinks) {
    // This logs the following to the console:
    // - number of links processed
    // - the next or previous page link found
    // - for each link: its href, text, concatenated debug string.
    // Location of logging output is different when running in different modes:
    // - "ant test.dev": test output file.
    // - chrome browser distiller viewer: chrome logfile.
    // (TODO)kuan): investigate how to get logging when running "ant test.prod" - currently,
    // nothing appears.  In the meantime, throwing an exception with a log message at suspicious
    // codepoints can produce a call stack and help debugging, albeit tediously.
    LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found "
            + (pageLink == PageLink.NEXT ? "next: " : "prev: ") + (pagingHref != null ? pagingHref : "null"));

    for (int i = 0; i < allLinks.getLength(); i++) {
        AnchorElement link = AnchorElement.as(allLinks.getItem(i));

        // Use javascript innerText (instead of javascript textContent) to get only visible
        // text./*w  w w .  j  av a  2  s.  c om*/
        String text = DomUtil.getInnerText(link);
        // Trim unnecessary white spaces from text.
        String[] words = StringUtil.split(text, "\\s+");
        text = "";
        for (int w = 0; w < words.length; w++) {
            text += words[w];
            if (w < words.length - 1)
                text += " ";
        }

        LogUtil.logToConsole(
                i + ")" + link.getHref() + ", txt=[" + text + "], dbg=[" + mLinkDebugInfo.get(link) + "]");
    }
}