Example usage for com.google.common.net InternetDomainName isUnderPublicSuffix

List of usage examples for com.google.common.net InternetDomainName isUnderPublicSuffix

Introduction

In this page you can find the example usage for com.google.common.net InternetDomainName isUnderPublicSuffix.

Prototype

public boolean isUnderPublicSuffix() 

Source Link

Document

Indicates whether this domain name ends in a #isPublicSuffix() public suffix , while not being a public suffix itself.

Usage

From source file:org.sindice.core.analytics.commons.util.URIUtil.java

/**
 * Return the second-level domain name. Returns null if the domain is not valid.
 * This method normalises domain names by removing the leading www sub-domain,
 * if present./*from w  w  w .  j a  v a  2s .c  o  m*/
 * @param domain
 * @return
 */
public static String getSndDomain(String domain) {
    if (domain == null) {
        return null;
    }
    // Remove www subdomain if it exists
    if (domain.startsWith("www.")) {
        domain = domain.substring(4);
    }
    if (InternetDomainName.isValid(domain)) { // the domain is valid according to the RFC3490
        final InternetDomainName idn = InternetDomainName.from(domain);
        if (idn.hasPublicSuffix()) { // the domain has a public suffix
            if (idn.isUnderPublicSuffix()) {
                return idn.topPrivateDomain().name();
            } else if (idn.hasParent()) {
                final List<String> parts = idn.parts();
                return parts.get(parts.size() - 2).concat(".").concat(parts.get(parts.size() - 1));
            }
        }
    }
    return null;
}

From source file:uk.bl.wa.extract.LinkExtractor.java

/**
 * Attempt to parse out the private domain. Fall back on host if things go
 * awry.// w  w w  .java 2  s . c o  m
 * 
 * @param host
 * @return
 */
public static String extractPrivateSuffixFromHost(String host) {
    if (host == null)
        return null;
    // Parse out the public suffix:
    InternetDomainName domainName;
    try {
        domainName = InternetDomainName.from(host);
    } catch (Exception e) {
        return host;
    }
    InternetDomainName suffix = null;
    // It appears the IDN class does not know about the various UK
    // second-level domains.
    // If it's a UK host, override the result by assuming three levels:
    if (host.endsWith(".uk")) {
        ImmutableList<String> parts = domainName.parts();
        if (parts.size() >= 3) {
            suffix = InternetDomainName.from(parts.get(parts.size() - 3) + "." + parts.get(parts.size() - 2)
                    + "." + parts.get(parts.size() - 1));
        }
    } else {
        if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
            suffix = domainName.topPrivateDomain();
        } else {
            suffix = domainName;
        }
    }

    // If it all failed for some reason, fall back on the host value:
    if (suffix == null)
        suffix = domainName;

    return suffix.toString();
}

From source file:google.registry.flows.host.HostFlowUtils.java

/** Checks that a host name is valid. */
static InternetDomainName validateHostName(String name) throws EppException {
    checkArgumentNotNull(name, "Must specify host name to validate");
    if (name.length() > 253) {
        throw new HostNameTooLongException();
    }/*from  ww  w  .ja  v a 2s  . c om*/
    String hostNameLowerCase = Ascii.toLowerCase(name);
    if (!name.equals(hostNameLowerCase)) {
        throw new HostNameNotLowerCaseException(hostNameLowerCase);
    }
    try {
        String hostNamePunyCoded = Idn.toASCII(name);
        if (!name.equals(hostNamePunyCoded)) {
            throw new HostNameNotPunyCodedException(hostNamePunyCoded);
        }
        InternetDomainName hostName = InternetDomainName.from(name);
        if (!name.equals(hostName.toString())) {
            throw new HostNameNotNormalizedException(hostName.toString());
        }
        // Checks whether a hostname is deep enough. Technically a host can be just one under a
        // public suffix (e.g. example.com) but we require by policy that it has to be at least one
        // part beyond that (e.g. ns1.example.com). The public suffix list includes all current
        // ccTlds, so this check requires 4+ parts if it's a ccTld that doesn't delegate second
        // level domains, such as .co.uk. But the list does not include new tlds, so in that case
        // we just ensure 3+ parts. In the particular case where our own tld has a '.' in it, we know
        // that there need to be 4 parts as well.
        if (hostName.isUnderPublicSuffix()) {
            if (hostName.parent().isUnderPublicSuffix()) {
                return hostName;
            }
        } else {
            // We need to know how many parts the hostname has beyond the public suffix, but we don't
            // know what the public suffix is. If the host is in bailiwick and we are hosting a
            // multipart "tld" like .co.uk the publix suffix might be 2 parts. Otherwise it's an
            // unrecognized tld that's not on the public suffix list, so assume the tld alone is the
            // public suffix.
            Optional<InternetDomainName> tldParsed = findTldForName(hostName);
            int suffixSize = tldParsed.isPresent() ? tldParsed.get().parts().size() : 1;
            if (hostName.parts().size() >= suffixSize + 2) {
                return hostName;
            }
        }
        throw new HostNameTooShallowException();
    } catch (IllegalArgumentException e) {
        throw new InvalidHostNameException();
    }
}

From source file:focusedCrawler.util.LinkRelevance.java

public String getTopLevelDomainName() {
    InternetDomainName domain = this.getDomainName();
    try {/*from   w  w w  .  j  a va 2 s  .  com*/
        if (domain.isUnderPublicSuffix()) {
            return domain.topPrivateDomain().toString();
        } else {
            // if the domain is a public suffix, just use it as top level domain
            return domain.toString();
        }
    } catch (Exception e) {
        throw new IllegalStateException("Invalid top private domain name=[" + domain + "] in URL=[" + url + "]",
                e);
    }
}

From source file:com.addthis.hydra.data.filter.bundle.BundleFilterURL.java

@Override
public boolean filter(Bundle bundle) {
    String pv = ValueUtil.asNativeString(field.getValue(bundle));
    if (!asFile) {
        if (pv == null || pv.length() < 7) {
            return invalidExit;
        }//from ww  w.j  a v  a 2 s  .  co m
        String lpv = pv.trim().toLowerCase();
        if (!(lpv.startsWith("http"))) {
            if (fixProto) {
                if (clean && lpv.indexOf("%2f") >= 0) {
                    pv = LessBytes.urldecode(pv);
                }
                pv = "http://".concat(pv);
            } else {
                return invalidExit;
            }
        }
        if (clean && (lpv.startsWith("http%") || lpv.startsWith("https%"))) {
            pv = LessBytes.urldecode(pv);
        }
    }
    // up to two 'decoding' passes on the url to try and find a valid one
    for (int i = 0; i < 2; i++) {
        if (pv == null) {
            return invalidExit;
        }
        try {
            URL urec = asFile ? new URL("file://".concat(pv)) : new URL(pv);
            String urlhost = urec.getHost();
            String returnhost = null;
            if (resolveIP) {
                synchronized (iphost) {
                    returnhost = iphost.get(urlhost).toLowerCase();
                    if (returnhost == null) {
                        returnhost = resolveDottedIP(urlhost);
                        iphost.put(urlhost, returnhost);
                        if (iphost.size() > maxhostcache) {
                            iphost.removeEldest();
                        }
                    }
                }
            } else {
                returnhost = urlhost.toLowerCase();
            }
            // store cleaned up (url decoded) version back to packet
            if (clean) {
                if (urec != null && urec.getPath().isEmpty()) {
                    // if the path element is null, append the slash
                    pv = pv.concat("/");
                }
                field.setValue(bundle, ValueFactory.create(pv));
            }
            if (setHost != null) {
                if (toBaseDomain) {
                    returnhost = NetUtil.getBaseDomain(returnhost);
                } else if (toTopPrivateDomain) {
                    if (returnhost != null && InternetDomainName.isValid(returnhost)) {
                        InternetDomainName domain = InternetDomainName.from(returnhost);
                        if (domain.hasPublicSuffix() && domain.isUnderPublicSuffix()) {
                            InternetDomainName topPrivateDomain = domain.topPrivateDomain();
                            returnhost = topPrivateDomain.toString();
                        }
                    }
                }
                setHost.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setPath != null) {
                setPath.setValue(bundle, ValueFactory.create(urec.getPath()));
            }
            if (setParams != null) {
                setParams.setValue(bundle, ValueFactory.create(urec.getQuery()));
            }
            if (setAnchor != null) {
                setAnchor.setValue(bundle, ValueFactory.create(urec.getRef()));
            }
            if (setHostNormal != null) {
                Matcher m = hostNormalPattern.matcher(returnhost);
                if (m.find()) {
                    returnhost = m.group(1);
                }
                setHostNormal.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setTopPrivateDomain != null) {
                String topDomain = returnhost;
                if (InternetDomainName.isValid(returnhost)) {
                    InternetDomainName domainName = InternetDomainName.from(returnhost);
                    if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
                        topDomain = DOT_JOINER.join(domainName.topPrivateDomain().parts());
                    }
                }
                setTopPrivateDomain.setValue(bundle, ValueFactory.create(topDomain));
            }
        } catch (MalformedURLException e) {
            if (pv.indexOf("%3") > 0 && pv.indexOf("%2") > 0) {
                pv = LessBytes.urldecode(pv);
            } else {
                if (debugMalformed) {
                    System.err.println("malformed(" + i + ") " + pv);
                }
                return invalidExit;
            }
        }
    }
    return true;
}

From source file:org.apache.commons.httpclient.cookie.CookieSpecBase.java

/**
 * Return an array of {@link Cookie}s that should be submitted with a
 * request with given attributes, <tt>false</tt> otherwise. 
 * //from w ww. jav  a 2 s  .c  om
 * If the SortedMap comes from an HttpState and is not itself
 * thread-safe, it may be necessary to synchronize on the HttpState
 * instance to protect against concurrent modification. 
 *
 * @param host the host to which the request is being submitted
 * @param port the port to which the request is being submitted (currently
 * ignored)
 * @param path the path to which the request is being submitted
 * @param secure <tt>true</tt> if the request is using a secure protocol
 * @param cookies SortedMap of <tt>Cookie</tt>s to be matched
 * @return an array of <tt>Cookie</tt>s matching the criterium
 */
@Override
public Cookie[] match(String host, int port, String path, boolean secure,
        final SortedMap<String, Cookie> cookies) {

    LOG.trace("enter CookieSpecBase.match(" + "String, int, String, boolean, SortedMap)");

    if (cookies == null) {
        return null;
    }
    List<Cookie> matching = new LinkedList<Cookie>();
    InternetDomainName domain;
    try {
        domain = InternetDomainName.fromLenient(host);
    } catch (IllegalArgumentException e) {
        domain = null;
    }

    String candidate = (domain != null) ? domain.name() : host;
    while (candidate != null) {
        Iterator<Cookie> iter = cookies.subMap(candidate, candidate + Cookie.DOMAIN_OVERBOUNDS).values()
                .iterator();
        while (iter.hasNext()) {
            Cookie cookie = (Cookie) (iter.next());
            if (match(host, port, path, secure, cookie)) {
                addInPathOrder(matching, cookie);
            }
        }
        StoredIterator.close(iter);
        if (domain != null && domain.isUnderPublicSuffix()) {
            domain = domain.parent();
            candidate = domain.name();
        } else {
            candidate = null;
        }
    }

    return (Cookie[]) matching.toArray(new Cookie[matching.size()]);
}