Example usage for com.google.common.net InternetDomainName isTopPrivateDomain

List of usage examples for com.google.common.net InternetDomainName isTopPrivateDomain

Introduction

In this page you can find the example usage for com.google.common.net InternetDomainName isTopPrivateDomain.

Prototype

public boolean isTopPrivateDomain() 

Source Link

Document

Indicates whether this domain name is composed of exactly one subdomain component followed by a #isPublicSuffix() public suffix .

Usage

From source file:uk.bl.wa.extract.LinkExtractor.java

/**
 * Attempt to parse out the private domain. Fall back on host if things go
 * awry.//w  w  w  . j  a  va 2s  .c o  m
 * 
 * @param host
 * @return
 */
public static String extractPrivateSuffixFromHost(String host) {
    if (host == null)
        return null;
    // Parse out the public suffix:
    InternetDomainName domainName;
    try {
        domainName = InternetDomainName.from(host);
    } catch (Exception e) {
        return host;
    }
    InternetDomainName suffix = null;
    // It appears the IDN class does not know about the various UK
    // second-level domains.
    // If it's a UK host, override the result by assuming three levels:
    if (host.endsWith(".uk")) {
        ImmutableList<String> parts = domainName.parts();
        if (parts.size() >= 3) {
            suffix = InternetDomainName.from(parts.get(parts.size() - 3) + "." + parts.get(parts.size() - 2)
                    + "." + parts.get(parts.size() - 1));
        }
    } else {
        if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
            suffix = domainName.topPrivateDomain();
        } else {
            suffix = domainName;
        }
    }

    // If it all failed for some reason, fall back on the host value:
    if (suffix == null)
        suffix = domainName;

    return suffix.toString();
}

From source file:com.addthis.hydra.data.filter.bundle.BundleFilterURL.java

@Override
public boolean filter(Bundle bundle) {
    String pv = ValueUtil.asNativeString(field.getValue(bundle));
    if (!asFile) {
        if (pv == null || pv.length() < 7) {
            return invalidExit;
        }/*from   w  w  w .ja va2s.co m*/
        String lpv = pv.trim().toLowerCase();
        if (!(lpv.startsWith("http"))) {
            if (fixProto) {
                if (clean && lpv.indexOf("%2f") >= 0) {
                    pv = LessBytes.urldecode(pv);
                }
                pv = "http://".concat(pv);
            } else {
                return invalidExit;
            }
        }
        if (clean && (lpv.startsWith("http%") || lpv.startsWith("https%"))) {
            pv = LessBytes.urldecode(pv);
        }
    }
    // up to two 'decoding' passes on the url to try and find a valid one
    for (int i = 0; i < 2; i++) {
        if (pv == null) {
            return invalidExit;
        }
        try {
            URL urec = asFile ? new URL("file://".concat(pv)) : new URL(pv);
            String urlhost = urec.getHost();
            String returnhost = null;
            if (resolveIP) {
                synchronized (iphost) {
                    returnhost = iphost.get(urlhost).toLowerCase();
                    if (returnhost == null) {
                        returnhost = resolveDottedIP(urlhost);
                        iphost.put(urlhost, returnhost);
                        if (iphost.size() > maxhostcache) {
                            iphost.removeEldest();
                        }
                    }
                }
            } else {
                returnhost = urlhost.toLowerCase();
            }
            // store cleaned up (url decoded) version back to packet
            if (clean) {
                if (urec != null && urec.getPath().isEmpty()) {
                    // if the path element is null, append the slash
                    pv = pv.concat("/");
                }
                field.setValue(bundle, ValueFactory.create(pv));
            }
            if (setHost != null) {
                if (toBaseDomain) {
                    returnhost = NetUtil.getBaseDomain(returnhost);
                } else if (toTopPrivateDomain) {
                    if (returnhost != null && InternetDomainName.isValid(returnhost)) {
                        InternetDomainName domain = InternetDomainName.from(returnhost);
                        if (domain.hasPublicSuffix() && domain.isUnderPublicSuffix()) {
                            InternetDomainName topPrivateDomain = domain.topPrivateDomain();
                            returnhost = topPrivateDomain.toString();
                        }
                    }
                }
                setHost.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setPath != null) {
                setPath.setValue(bundle, ValueFactory.create(urec.getPath()));
            }
            if (setParams != null) {
                setParams.setValue(bundle, ValueFactory.create(urec.getQuery()));
            }
            if (setAnchor != null) {
                setAnchor.setValue(bundle, ValueFactory.create(urec.getRef()));
            }
            if (setHostNormal != null) {
                Matcher m = hostNormalPattern.matcher(returnhost);
                if (m.find()) {
                    returnhost = m.group(1);
                }
                setHostNormal.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setTopPrivateDomain != null) {
                String topDomain = returnhost;
                if (InternetDomainName.isValid(returnhost)) {
                    InternetDomainName domainName = InternetDomainName.from(returnhost);
                    if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
                        topDomain = DOT_JOINER.join(domainName.topPrivateDomain().parts());
                    }
                }
                setTopPrivateDomain.setValue(bundle, ValueFactory.create(topDomain));
            }
        } catch (MalformedURLException e) {
            if (pv.indexOf("%3") > 0 && pv.indexOf("%2") > 0) {
                pv = LessBytes.urldecode(pv);
            } else {
                if (debugMalformed) {
                    System.err.println("malformed(" + i + ") " + pv);
                }
                return invalidExit;
            }
        }
    }
    return true;
}