Example usage for com.google.common.net InternetDomainName topPrivateDomain

List of usage examples for com.google.common.net InternetDomainName topPrivateDomain

Introduction

In this page you can find the example usage for com.google.common.net InternetDomainName topPrivateDomain.

Prototype

public InternetDomainName topPrivateDomain() 

Source Link

Document

Returns the portion of this domain name that is one level beneath the public suffix.

Usage

From source file:edu.uga.cs.fluxbuster.utils.DomainNameUtils.java

/**
 * Extracts the effective second level domain name.
 *
 * @param domainname the full domain name
 * @return the second level domain name or null on error
 *///from w  w w  .j  av a 2 s . com
public static String extractEffective2LD(String domainname) {
    String retval = null;
    try {
        InternetDomainName idn = InternetDomainName.from(domainname);
        InternetDomainName sld = idn.topPrivateDomain();
        retval = sld.name();
    } catch (Exception e) {
        if (log.isDebugEnabled()) {
            log.debug("Unable to extract 2LD.", e);
        }
    }
    return retval;
}

From source file:com.evidon.areweprivateyet.AnalysisUtils.java

public static String getGuavaDomain(String url) throws Exception {
    if (url.indexOf("#") > 0) {
        url = url.substring(0, url.indexOf("#"));
    }//from  w w  w.  jav  a  2  s.c  om

    if (url.indexOf("?") > 0) {
        url = url.substring(0, url.indexOf("?"));
    }

    if (url.indexOf(";") > 0) {
        url = url.substring(0, url.indexOf(";"));
    }

    if (url.indexOf("|") > 0) {
        url = url.substring(0, url.indexOf("|"));
    }

    if (url.indexOf("_") > 0) {
        url = url.replaceAll("_", "");
    }

    if (url.indexOf("%") > 0) {
        url = url.replaceAll("%", "");
    }

    // strip port
    if (url.indexOf(":8080") > 0) {
        url = url.replaceAll(":8080", "");
    }

    String host = new URI(url).getHost();
    try {
        InternetDomainName domainName = InternetDomainName.from(host);
        return domainName.topPrivateDomain().name();
    } catch (java.lang.IllegalStateException e) {
        return AnalysisUtils.getBaseDomain(url);
    } catch (java.lang.IllegalArgumentException e) {
        if (url.startsWith("https://")) {
            url = url.substring(7);
        }

        if (url.startsWith("http://")) {
            url = url.substring(7);
        }

        if (url.indexOf("/") > 0) {
            url = url.substring(0, url.indexOf("/"));
        }

        if (url.matches("^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}$")) {
            return url;
        } else {
            throw new Exception();
        }
    }
}

From source file:org.sindice.core.analytics.commons.util.URIUtil.java

/**
 * Return the second-level domain name. Returns null if the domain is not valid.
 * This method normalises domain names by removing the leading www sub-domain,
 * if present./*ww  w.  j a  va 2 s .  co m*/
 * @param domain
 * @return
 */
public static String getSndDomain(String domain) {
    if (domain == null) {
        return null;
    }
    // Remove www subdomain if it exists
    if (domain.startsWith("www.")) {
        domain = domain.substring(4);
    }
    if (InternetDomainName.isValid(domain)) { // the domain is valid according to the RFC3490
        final InternetDomainName idn = InternetDomainName.from(domain);
        if (idn.hasPublicSuffix()) { // the domain has a public suffix
            if (idn.isUnderPublicSuffix()) {
                return idn.topPrivateDomain().name();
            } else if (idn.hasParent()) {
                final List<String> parts = idn.parts();
                return parts.get(parts.size() - 2).concat(".").concat(parts.get(parts.size() - 1));
            }
        }
    }
    return null;
}

From source file:uk.bl.wa.extract.LinkExtractor.java

/**
 * Attempt to parse out the private domain. Fall back on host if things go
 * awry.//from  ww  w  .  j  a va2  s . c o m
 * 
 * @param host
 * @return
 */
public static String extractPrivateSuffixFromHost(String host) {
    if (host == null)
        return null;
    // Parse out the public suffix:
    InternetDomainName domainName;
    try {
        domainName = InternetDomainName.from(host);
    } catch (Exception e) {
        return host;
    }
    InternetDomainName suffix = null;
    // It appears the IDN class does not know about the various UK
    // second-level domains.
    // If it's a UK host, override the result by assuming three levels:
    if (host.endsWith(".uk")) {
        ImmutableList<String> parts = domainName.parts();
        if (parts.size() >= 3) {
            suffix = InternetDomainName.from(parts.get(parts.size() - 3) + "." + parts.get(parts.size() - 2)
                    + "." + parts.get(parts.size() - 1));
        }
    } else {
        if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
            suffix = domainName.topPrivateDomain();
        } else {
            suffix = domainName;
        }
    }

    // If it all failed for some reason, fall back on the host value:
    if (suffix == null)
        suffix = domainName;

    return suffix.toString();
}

From source file:org.archive.porky.ExtractTopPrivateDomainFromHostNameUDF.java

public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    String str = null;/*from   www.j a  v a2  s.  co  m*/
    try {
        str = (String) input.get(0);
        InternetDomainName domainName = InternetDomainName.from(str);
        return (domainName.topPrivateDomain().name());
    } catch (Exception e) {
        return "other";
    }
}

From source file:com.jaeksoft.searchlib.analysis.filter.domain.DomainTldTokenFilter.java

@Override
public final boolean incrementToken() throws IOException {
    if (!input.incrementToken())
        return false;
    try {//from www  . j av a 2  s.c  om
        URL url = LinkUtils.newEncodedURL(termAtt.toString());
        InternetDomainName domainName = InternetDomainName.from(url.getHost());
        termAtt.setEmpty();
        termAtt.append(domainName.topPrivateDomain().toString());
    } catch (MalformedURLException e) {
        if (silent)
            return false;
        throw e;
    } catch (IllegalArgumentException e) {
        if (silent)
            return false;
        throw e;
    } catch (URISyntaxException e) {
        if (silent)
            return false;
        throw new IOException(e);
    }
    return true;
}

From source file:focusedCrawler.util.LinkRelevance.java

public String getTopLevelDomainName() {
    InternetDomainName domain = this.getDomainName();
    try {/*from   ww w  .j a  v  a  2  s. c  o  m*/
        if (domain.isUnderPublicSuffix()) {
            return domain.topPrivateDomain().toString();
        } else {
            // if the domain is a public suffix, just use it as top level domain
            return domain.toString();
        }
    } catch (Exception e) {
        throw new IllegalStateException("Invalid top private domain name=[" + domain + "] in URL=[" + url + "]",
                e);
    }
}

From source file:org.mayocat.multitenancy.DefaultTenantResolver.java

private String extractSlugFromHost(String host) {
    String rootDomain;//from  w  w w .  j  a  va 2  s. co  m
    String siteName = siteSettings.getWebDomainName().or(siteSettings.getDomainName());
    if (Strings.emptyToNull(siteName) == null) {
        InternetDomainName domainName = InternetDomainName.from(host);
        if (domainName.hasPublicSuffix()) {
            // Domain is under a valid TLD, extract the TLD + first child
            rootDomain = domainName.topPrivateDomain().name();
        } else if (host.indexOf(".") > 0 && host.indexOf(".") < host.length()) {
            // Otherwise, best guess : strip everything before the first dot.
            rootDomain = host.substring(host.indexOf(".") + 1);
        } else {
            rootDomain = host;
        }
    } else {
        rootDomain = StringUtils.substringBefore(siteSettings.getDomainName(), ":");
    }
    if (host.indexOf("." + rootDomain) > 0) {
        return host.substring(0, host.indexOf("." + rootDomain));
    } else {
        return host;
    }
}

From source file:com.addthis.hydra.data.filter.bundle.BundleFilterURL.java

@Override
public boolean filter(Bundle bundle) {
    String pv = ValueUtil.asNativeString(field.getValue(bundle));
    if (!asFile) {
        if (pv == null || pv.length() < 7) {
            return invalidExit;
        }// w ww .  ja  v  a2 s .  c  o  m
        String lpv = pv.trim().toLowerCase();
        if (!(lpv.startsWith("http"))) {
            if (fixProto) {
                if (clean && lpv.indexOf("%2f") >= 0) {
                    pv = LessBytes.urldecode(pv);
                }
                pv = "http://".concat(pv);
            } else {
                return invalidExit;
            }
        }
        if (clean && (lpv.startsWith("http%") || lpv.startsWith("https%"))) {
            pv = LessBytes.urldecode(pv);
        }
    }
    // up to two 'decoding' passes on the url to try and find a valid one
    for (int i = 0; i < 2; i++) {
        if (pv == null) {
            return invalidExit;
        }
        try {
            URL urec = asFile ? new URL("file://".concat(pv)) : new URL(pv);
            String urlhost = urec.getHost();
            String returnhost = null;
            if (resolveIP) {
                synchronized (iphost) {
                    returnhost = iphost.get(urlhost).toLowerCase();
                    if (returnhost == null) {
                        returnhost = resolveDottedIP(urlhost);
                        iphost.put(urlhost, returnhost);
                        if (iphost.size() > maxhostcache) {
                            iphost.removeEldest();
                        }
                    }
                }
            } else {
                returnhost = urlhost.toLowerCase();
            }
            // store cleaned up (url decoded) version back to packet
            if (clean) {
                if (urec != null && urec.getPath().isEmpty()) {
                    // if the path element is null, append the slash
                    pv = pv.concat("/");
                }
                field.setValue(bundle, ValueFactory.create(pv));
            }
            if (setHost != null) {
                if (toBaseDomain) {
                    returnhost = NetUtil.getBaseDomain(returnhost);
                } else if (toTopPrivateDomain) {
                    if (returnhost != null && InternetDomainName.isValid(returnhost)) {
                        InternetDomainName domain = InternetDomainName.from(returnhost);
                        if (domain.hasPublicSuffix() && domain.isUnderPublicSuffix()) {
                            InternetDomainName topPrivateDomain = domain.topPrivateDomain();
                            returnhost = topPrivateDomain.toString();
                        }
                    }
                }
                setHost.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setPath != null) {
                setPath.setValue(bundle, ValueFactory.create(urec.getPath()));
            }
            if (setParams != null) {
                setParams.setValue(bundle, ValueFactory.create(urec.getQuery()));
            }
            if (setAnchor != null) {
                setAnchor.setValue(bundle, ValueFactory.create(urec.getRef()));
            }
            if (setHostNormal != null) {
                Matcher m = hostNormalPattern.matcher(returnhost);
                if (m.find()) {
                    returnhost = m.group(1);
                }
                setHostNormal.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setTopPrivateDomain != null) {
                String topDomain = returnhost;
                if (InternetDomainName.isValid(returnhost)) {
                    InternetDomainName domainName = InternetDomainName.from(returnhost);
                    if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
                        topDomain = DOT_JOINER.join(domainName.topPrivateDomain().parts());
                    }
                }
                setTopPrivateDomain.setValue(bundle, ValueFactory.create(topDomain));
            }
        } catch (MalformedURLException e) {
            if (pv.indexOf("%3") > 0 && pv.indexOf("%2") > 0) {
                pv = LessBytes.urldecode(pv);
            } else {
                if (debugMalformed) {
                    System.err.println("malformed(" + i + ") " + pv);
                }
                return invalidExit;
            }
        }
    }
    return true;
}