List of usage examples for com.google.common.net InternetDomainName publicSuffix
public InternetDomainName publicSuffix()
From source file:org.apache.metron.common.dsl.functions.NetworkFunctions.java
/** * Extract the TLD. If the domain is a normal domain, then we can handle the TLD via the InternetDomainName object. * If it is not, then we default to returning the last segment after the final '.' * @param idn/*from ww w .j av a 2s . co m*/ * @param dn * @return The TLD of the domain */ private static String extractTld(InternetDomainName idn, String dn) { if (idn != null && idn.hasPublicSuffix()) { return idn.publicSuffix().toString(); } else if (dn != null) { StringBuffer tld = new StringBuffer(""); for (int idx = dn.length() - 1; idx >= 0; idx--) { char c = dn.charAt(idx); if (c == '.') { break; } else { tld.append(dn.charAt(idx)); } } return tld.reverse().toString(); } else { return null; } }
From source file:org.apache.metron.stellar.dsl.functions.NetworkFunctions.java
/** * Extract the TLD. If the domain is a normal domain, then we can handle the TLD via the InternetDomainName object. * If it is not, then we default to returning the last segment after the final '.' * @param idn/*from w ww . ja v a 2 s. com*/ * @param dn * @return The TLD of the domain */ private static String extractTld(InternetDomainName idn, String dn) { if (idn != null && idn.hasPublicSuffix()) { String ret = idn.publicSuffix().toString(); if (ret.startsWith("InternetDomainName")) { return Joiner.on(".").join(idn.publicSuffix().parts()); } else { return ret; } } else if (dn != null) { StringBuffer tld = new StringBuffer(""); for (int idx = dn.length() - 1; idx >= 0; idx--) { char c = dn.charAt(idx); if (c == '.') { break; } else { tld.append(dn.charAt(idx)); } } return tld.reverse().toString(); } else { return null; } }
From source file:uk.bl.wa.extract.LinkExtractor.java
public static String extractPublicSuffixFromHost(String host) { if (host == null) return null; // Parse out the public suffix: InternetDomainName domainName; try {//from w w w .ja v a 2 s . c o m domainName = InternetDomainName.from(host); } catch (Exception e) { return null; } InternetDomainName suffix = null; if (host.endsWith(".uk")) { ImmutableList<String> parts = domainName.parts(); if (parts.size() >= 2) { suffix = InternetDomainName.from(parts.get(parts.size() - 2) + "." + parts.get(parts.size() - 1)); } } else { suffix = domainName.publicSuffix(); } // Return a value: if (suffix == null) return null; return suffix.toString(); }
From source file:edu.wisc.ssec.mcidasv.data.PolarOrbitTrackDataSource.java
/** * Create a nice looking name for this instance. * /*from w ww .j a v a 2 s .c o m*/ * <p>Given a URL like * {@code http://celestrak.com/NORAD/elements/weather.txt}, this method * will return {@code celestrak: /NORAD/elements/weather.txt}.</p> * * <p>If the hostname from {@code urlStr} could not be sufficiently reduced, * this method will simply use the entire hostname. A URL like * {@code http://adde.ssec.wisc.edu/weather.txt} will return * {@code adde.ssec.wisc.edu: weather.txt}.</p> * * <p>If there was a problem parsing {@code urlStr}, the method will try * to return the filename. A URL like * {@code http://celestrak.com/NORAD/elements/weather.txt} would return * {@code weather.txt}.</p> * * <p>If all of the above fails, {@code urlStr} will be returned.</p> * * @param urlStr URL of the TLE information. Cannot be {@code null}. * * @return Either the name as described above, or {@code null} if there was * a problem. */ public static String makeNameForRemoteSource(String urlStr) { Objects.requireNonNull(urlStr, "Cannot use a null URL string"); String result; try { URL url = new URL(urlStr); String host = url.getHost(); String path = url.getPath(); // thank you, guava! InternetDomainName domain = InternetDomainName.from(host); // suffix will be something like 'com' or 'co.uk', so suffixStart // needs to start one character earlier to remove the trailing '.' String suffix = domain.publicSuffix().toString(); int suffixStart = host.indexOf(suffix) - 1; String trimmed = host.substring(0, suffixStart); // Trying this with 'http://adde.ssec.wisc.edu/weather.txt' will // result in trimmed being 'adde.ssec.wisc', and I imagine there // are more edge cases. With that in mind, we just use the hostname // if it looks like trimmed doesn't look nice if (trimmed.indexOf('.') > -1) { result = host + ": " + path; } else { result = trimmed + ": " + path; } } catch (IllegalArgumentException e) { // InternetDomainName.from() call likely failed; simply return // original URL string as specified by the javadoc! result = urlStr; logger.warn("Problem with URL '" + urlStr + '\'', e); } catch (MalformedURLException e) { logger.error("Bad URL", e); int lastSlash = urlStr.lastIndexOf('/'); if (lastSlash > -1) { // need the "+1" to get rid of the slash result = urlStr.substring(lastSlash + 1); } else { result = urlStr; } } return result; }
From source file:com.jaeksoft.searchlib.analysis.filter.domain.TldTokenFilter.java
@Override public final boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; try {//from w ww. j av a 2 s . c o m URL url = LinkUtils.newEncodedURL(termAtt.toString()); InternetDomainName domainName = InternetDomainName.from(url.getHost()); termAtt.setEmpty(); termAtt.append(domainName.publicSuffix().name()); } catch (MalformedURLException e) { if (silent) return false; throw e; } catch (IllegalArgumentException e) { if (silent) return false; throw e; } catch (URISyntaxException e) { if (silent) return false; throw new IOException(e); } return true; }