Example usage for com.google.common.net InternetDomainName isValid

List of usage examples for com.google.common.net InternetDomainName isValid

Introduction

In this page you can find the example usage for com.google.common.net InternetDomainName isValid.

Prototype

public static boolean isValid(String name) 

Source Link

Document

Indicates whether the argument is a syntactically valid domain name using lenient validation.

Usage

From source file:org.elasticsearch.plugin.readonlyrest.acl.blocks.rules.impl.XForwardedForSyncRule.java

public XForwardedForSyncRule(Settings s) throws RuleNotConfiguredException {
    super();//from  w  ww .  ja v a  2  s  .  c o m
    String[] a = s.getAsArray(getKey());
    if (a != null && a.length > 0) {
        allowedAddresses = Lists.newArrayList();
        for (int i = 0; i < a.length; i++) {
            if (!Strings.isNullOrEmpty(a[i])) {
                try {
                    IPMask.getIPMask(a[i]);
                } catch (Exception e) {
                    if (!InternetDomainName.isValid(a[i])) {
                        throw new RuleConfigurationError("invalid address", e);
                    }
                }
                allowedAddresses.add(a[i].trim());
            }
        }
    } else {
        throw new RuleNotConfiguredException();
    }
}

From source file:org.elasticsearch.plugin.readonlyrest.acl.blocks.rules.impl.HostsSyncRule.java

public HostsSyncRule(Settings s) throws RuleNotConfiguredException {
    super();//from   ww  w . j av  a  2s . co m
    acceptXForwardedForHeader = s.getAsBoolean("accept_x-forwarded-for_header", false);
    String[] a = s.getAsArray("hosts");
    if (a != null && a.length > 0) {
        allowedAddresses = Lists.newArrayList();
        for (int i = 0; i < a.length; i++) {
            if (!Strings.isNullOrEmpty(a[i])) {
                try {
                    IPMask.getIPMask(a[i]);
                } catch (Exception e) {
                    if (!InternetDomainName.isValid(a[i])) {
                        throw new RuleConfigurationError("invalid address", e);
                    }
                }
                allowedAddresses.add(a[i].trim());
            }
        }
    } else {
        throw new RuleNotConfiguredException();
    }
}

From source file:edu.uci.ics.crawler4j.url.WebURL.java

public void setURL(String url) {
    this.url = url;

    int domainStartIdx = url.indexOf("//") + 2;
    int domainEndIdx = url.indexOf('/', domainStartIdx);
    domainEndIdx = (domainEndIdx > domainStartIdx) ? domainEndIdx : url.length();
    String domain = url.substring(domainStartIdx, domainEndIdx);
    registeredDomain = domain;//from w ww . j  a  v  a2 s .  c  o m
    subDomain = "";
    if (tldList != null && !(domain.isEmpty()) && InternetDomainName.isValid(domain)) {
        String candidate = null;
        String rd = null;
        String sd = null;
        String[] parts = domain.split("\\.");
        for (int i = parts.length - 1; i >= 0; i--) {
            if (rd == null) {
                if (candidate == null) {
                    candidate = parts[i];
                } else {
                    candidate = parts[i] + "." + candidate;
                }
                if (tldList.isRegisteredDomain(candidate)) {
                    rd = candidate;
                }
            } else {
                if (sd == null) {
                    sd = parts[i];
                } else {
                    sd = parts[i] + "." + sd;
                }
            }
        }
        if (rd != null) {
            registeredDomain = rd;
        }
        if (sd != null) {
            subDomain = sd;
        }
    }
    path = url.substring(domainEndIdx);
    int pathEndIdx = path.indexOf('?');
    if (pathEndIdx >= 0) {
        path = path.substring(0, pathEndIdx);
    }
}

From source file:org.archive.crawler.prefetch.HostQuotaEnforcer.java

@Override
protected boolean shouldProcess(CrawlURI curi) {
    String uriHostname = serverCache.getHostFor(curi.getUURI()).getHostName();
    if (getApplyToSubdomains() && InternetDomainName.isValid(host) && InternetDomainName.isValid(uriHostname)) {
        InternetDomainName h = InternetDomainName.from(host);
        InternetDomainName uriHostOrAncestor = InternetDomainName.from(uriHostname);
        while (true) {
            if (uriHostOrAncestor.equals(h)) {
                return true;
            }/*from w w  w.  j  a  v  a2 s  .  c  o  m*/
            if (uriHostOrAncestor.hasParent()) {
                uriHostOrAncestor = uriHostOrAncestor.parent();
            } else {
                break;
            }
        }

        return false;
    } else {
        return serverCache.getHostFor(curi.getUURI()) == serverCache.getHostFor(host);
    }

}

From source file:org.archive.modules.fetcher.BdbCookieStore.java

/**
 * Returns a {@link LimitedCookieStoreFacade} whose
 * {@link LimitedCookieStoreFacade#getCookies()} method returns only cookies
 * from {@code host} and its parent domains, if applicable.
 *//*from   ww  w. j a va  2  s .c  o m*/
public CookieStore cookieStoreFor(String host) {
    CompositeCollection cookieCollection = new CompositeCollection();

    if (InternetDomainName.isValid(host)) {
        InternetDomainName domain = InternetDomainName.from(host);

        while (domain != null) {
            Collection<Cookie> subset = hostSubset(domain.toString());
            cookieCollection.addComposited(subset);

            if (domain.hasParent()) {
                domain = domain.parent();
            } else {
                domain = null;
            }
        }
    } else {
        Collection<Cookie> subset = hostSubset(host.toString());
        cookieCollection.addComposited(subset);
    }

    @SuppressWarnings("unchecked")
    List<Cookie> cookieList = new RestrictedCollectionWrappedList<Cookie>(cookieCollection);
    LimitedCookieStoreFacade store = new LimitedCookieStoreFacade(cookieList);
    return store;
}

From source file:com.qwazr.crawler.web.manager.WebCrawlThread.java

private boolean matchesInitialDomain(URI uri) {
    String host = uri.getHost();//from w w w . j a  v a  2s .c  o  m
    if (StringUtils.isEmpty(host))
        return false;
    if (!InternetDomainName.isValid(host))
        return false;
    return internetDomainName.equals(InternetDomainName.from(host));
}

From source file:com.addthis.hydra.data.filter.bundle.BundleFilterURL.java

@Override
public boolean filter(Bundle bundle) {
    String pv = ValueUtil.asNativeString(field.getValue(bundle));
    if (!asFile) {
        if (pv == null || pv.length() < 7) {
            return invalidExit;
        }/* w w  w . ja  v  a  2s  .  c o m*/
        String lpv = pv.trim().toLowerCase();
        if (!(lpv.startsWith("http"))) {
            if (fixProto) {
                if (clean && lpv.indexOf("%2f") >= 0) {
                    pv = LessBytes.urldecode(pv);
                }
                pv = "http://".concat(pv);
            } else {
                return invalidExit;
            }
        }
        if (clean && (lpv.startsWith("http%") || lpv.startsWith("https%"))) {
            pv = LessBytes.urldecode(pv);
        }
    }
    // up to two 'decoding' passes on the url to try and find a valid one
    for (int i = 0; i < 2; i++) {
        if (pv == null) {
            return invalidExit;
        }
        try {
            URL urec = asFile ? new URL("file://".concat(pv)) : new URL(pv);
            String urlhost = urec.getHost();
            String returnhost = null;
            if (resolveIP) {
                synchronized (iphost) {
                    returnhost = iphost.get(urlhost).toLowerCase();
                    if (returnhost == null) {
                        returnhost = resolveDottedIP(urlhost);
                        iphost.put(urlhost, returnhost);
                        if (iphost.size() > maxhostcache) {
                            iphost.removeEldest();
                        }
                    }
                }
            } else {
                returnhost = urlhost.toLowerCase();
            }
            // store cleaned up (url decoded) version back to packet
            if (clean) {
                if (urec != null && urec.getPath().isEmpty()) {
                    // if the path element is null, append the slash
                    pv = pv.concat("/");
                }
                field.setValue(bundle, ValueFactory.create(pv));
            }
            if (setHost != null) {
                if (toBaseDomain) {
                    returnhost = NetUtil.getBaseDomain(returnhost);
                } else if (toTopPrivateDomain) {
                    if (returnhost != null && InternetDomainName.isValid(returnhost)) {
                        InternetDomainName domain = InternetDomainName.from(returnhost);
                        if (domain.hasPublicSuffix() && domain.isUnderPublicSuffix()) {
                            InternetDomainName topPrivateDomain = domain.topPrivateDomain();
                            returnhost = topPrivateDomain.toString();
                        }
                    }
                }
                setHost.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setPath != null) {
                setPath.setValue(bundle, ValueFactory.create(urec.getPath()));
            }
            if (setParams != null) {
                setParams.setValue(bundle, ValueFactory.create(urec.getQuery()));
            }
            if (setAnchor != null) {
                setAnchor.setValue(bundle, ValueFactory.create(urec.getRef()));
            }
            if (setHostNormal != null) {
                Matcher m = hostNormalPattern.matcher(returnhost);
                if (m.find()) {
                    returnhost = m.group(1);
                }
                setHostNormal.setValue(bundle, ValueFactory.create(returnhost));
            }
            if (setTopPrivateDomain != null) {
                String topDomain = returnhost;
                if (InternetDomainName.isValid(returnhost)) {
                    InternetDomainName domainName = InternetDomainName.from(returnhost);
                    if (domainName.isTopPrivateDomain() || domainName.isUnderPublicSuffix()) {
                        topDomain = DOT_JOINER.join(domainName.topPrivateDomain().parts());
                    }
                }
                setTopPrivateDomain.setValue(bundle, ValueFactory.create(topDomain));
            }
        } catch (MalformedURLException e) {
            if (pv.indexOf("%3") > 0 && pv.indexOf("%2") > 0) {
                pv = LessBytes.urldecode(pv);
            } else {
                if (debugMalformed) {
                    System.err.println("malformed(" + i + ") " + pv);
                }
                return invalidExit;
            }
        }
    }
    return true;
}

From source file:google.registry.xml.XmlTestUtils.java

/**
 * Deeply explore the object and normalize values so that things we consider equal compare so.
 * The return value consists of two parts: the updated key and the value. The value is
 * straightforward enough: it is the rendering of the subtree to be attached at the current point.
 * The key is more complicated, because of namespaces. When an XML element specifies namespaces
 * using xmlns attributes, those namespaces apply to the element as well as all of its
 * descendants. That means that, when prefixing the element name with the full namespace path,
 * as required to do proper comparison, the element name depends on its children. When looping
 * through a JSONObject map, we can't just recursively generate the value and store it using the
 * key. We may have to update the key as well, to get the namespaces correct. A returned key of
 * null indicates that we should use the existing key. A non-null key indicates that we should
 * replace the existing key.//  ww  w  .  j  a va  2 s .c  o  m
 *
 * @param elementName the name under which the current subtree was found, or null if the current
 *     subtree's name is nonexistent or irrelevant
 * @param obj the current subtree
 * @param path the (non-namespaced) element path used for ignoredPaths purposes
 * @param ignoredPaths the set of paths whose values should be set to IGNORED
 * @param nsMap the inherited namespace identifier-to-URI map
 * @return the key under which the rendered subtree should be stored (or null), and the rendered
 *     subtree
 */
private static Map.Entry<String, Object> normalize(@Nullable String elementName, Object obj,
        @Nullable String path, Set<String> ignoredPaths, Map<String, String> nsMap) throws Exception {
    if (obj instanceof JSONObject) {
        JSONObject jsonObject = (JSONObject) obj;
        Map<String, Object> map = new HashMap<>();
        String[] names = JSONObject.getNames(jsonObject);
        if (names != null) {
            // Separate all elements and keys into namespace specifications, which we must process
            // first, and everything else.
            ImmutableList.Builder<String> namespacesBuilder = new ImmutableList.Builder<>();
            ImmutableList.Builder<String> othersBuilder = new ImmutableList.Builder<>();
            for (String key : names) {
                (key.startsWith("xmlns") ? namespacesBuilder : othersBuilder).add(key);
            }
            // First, handle all namespace specifications, updating our ns-to-URI map. Use a HashMap
            // rather than an ImmutableMap.Builder so that we can override existing map entries.
            HashMap<String, String> newNsMap = new HashMap<>();
            newNsMap.putAll(nsMap);
            for (String key : namespacesBuilder.build()) {
                // Parse the attribute name, of the form xmlns:nsid, and extract the namespace identifier.
                // If there's no colon, we are setting the default namespace.
                List<String> components = Splitter.on(':').splitToList(key);
                String ns = (components.size() >= 2) ? components.get(1) : "";
                newNsMap.put(ns, jsonObject.get(key).toString());
            }
            nsMap = ImmutableMap.copyOf(newNsMap);
            // Now, handle the non-namespace items, recursively transforming the map and mapping all
            // namespaces to the full URI for proper comparison.
            for (String key : othersBuilder.build()) {
                String simpleKey = Iterables.getLast(Splitter.on(':').split(key));
                String newPath = (path == null) ? simpleKey : (path + "." + simpleKey);
                String mappedKey;
                Object value;
                if (ignoredPaths.contains(newPath)) {
                    mappedKey = null;
                    // Set ignored fields to a value that will compare equal.
                    value = "IGNORED";
                } else {
                    Map.Entry<String, Object> simpleEntry = normalize(key, jsonObject.get(key), newPath,
                            ignoredPaths, nsMap);
                    mappedKey = simpleEntry.getKey();
                    value = simpleEntry.getValue();
                }
                if (mappedKey == null) {
                    // Note that this does not follow the XML rules exactly. I read somewhere that attribute
                    // names, unlike element names, never use the default namespace. But after
                    // JSONification, we cannot distinguish between attributes and child elements, so we
                    // apply the default namespace to everything. Hopefully that will not cause a problem.
                    mappedKey = key.equals("content") ? key : mapName(key, nsMap, true);
                }
                map.put(mappedKey, value);
            }
        }
        // Map the namespace of the element name of the map we are normalizing.
        elementName = mapName(elementName, nsMap, true);
        // If a node has both text content and attributes, the text content will end up under a key
        // called "content". If that's the only thing left (which will only happen if there was an
        // "xmlns:*" key that we removed), treat the node as just text and recurse.
        if (map.size() == 1 && map.containsKey("content")) {
            return new AbstractMap.SimpleEntry<>(elementName,
                    normalize(null, jsonObject.get("content"), path, ignoredPaths, nsMap).getValue());
        }
        // The conversion to JSON converts <a/> into "" and the semantically equivalent <a></a> into
        // an empty map, so normalize that here.
        return new AbstractMap.SimpleEntry<>(elementName, map.isEmpty() ? "" : map);
    }
    if (obj instanceof JSONArray) {
        // Another problem resulting from JSONification: If the array contains elements whose names
        // are the same before URI expansion, but different after URI expansion, because they use
        // xmlns attribute that define the namespaces differently, we will screw up. Again, hopefully
        // that doesn't happen much. The reverse is also true: If the array contains names that are
        // different before URI expansion, but the same after, we may have a problem, because the
        // elements will wind up in different JSONArrays as a result of JSONification. We wave our
        // hands and just assume that the URI expansion of the first element holds for all others.
        Set<Object> set = new HashSet<>();
        String mappedKey = null;
        for (int i = 0; i < ((JSONArray) obj).length(); ++i) {
            Map.Entry<String, Object> simpleEntry = normalize(null, ((JSONArray) obj).get(i), path,
                    ignoredPaths, nsMap);
            if (i == 0) {
                mappedKey = simpleEntry.getKey();
            }
            set.add(simpleEntry.getValue());
        }
        return new AbstractMap.SimpleEntry<String, Object>(mappedKey, set);
    }
    if (obj instanceof Number) {
        return new AbstractMap.SimpleEntry<String, Object>(null, obj.toString());
    }
    if (obj instanceof Boolean) {
        return new AbstractMap.SimpleEntry<String, Object>(null, ((Boolean) obj) ? "1" : "0");
    }
    if (obj instanceof String) {
        // Turn stringified booleans into integers. Both are acceptable as xml boolean values, but
        // we use "true" and "false" whereas the samples use "1" and "0".
        if (obj.equals("true")) {
            return new AbstractMap.SimpleEntry<String, Object>(null, "1");
        }
        if (obj.equals("false")) {
            return new AbstractMap.SimpleEntry<String, Object>(null, "0");
        }
        String string = obj.toString();
        // We use a slightly different datetime format (both legal) than the samples, so normalize
        // both into Datetime objects.
        try {
            return new AbstractMap.SimpleEntry<String, Object>(null,
                    ISODateTimeFormat.dateTime().parseDateTime(string).toDateTime(UTC));
        } catch (IllegalArgumentException e) {
            // It wasn't a DateTime.
        }
        try {
            return new AbstractMap.SimpleEntry<String, Object>(null,
                    ISODateTimeFormat.dateTimeNoMillis().parseDateTime(string).toDateTime(UTC));
        } catch (IllegalArgumentException e) {
            // It wasn't a DateTime.
        }
        try {
            if (!InternetDomainName.isValid(string)) {
                // It's not a domain name, but it is an InetAddress. Ergo, it's an ip address.
                return new AbstractMap.SimpleEntry<String, Object>(null, InetAddresses.forString(string));
            }
        } catch (IllegalArgumentException e) {
            // Not an ip address.
        }
        return new AbstractMap.SimpleEntry<String, Object>(null, string);
    }
    return new AbstractMap.SimpleEntry<>(null, checkNotNull(obj));
}

From source file:org.archive.modules.fetcher.FetchWhois.java

/**
 * Adds outlinks to whois:{domain} and whois:{ipAddress} 
 *///from  w w  w  . ja v a  2s  .  co  m
protected void addWhoisLinks(CrawlURI curi) throws InterruptedException {
    CrawlHost ch = serverCache.getHostFor(curi.getUURI());

    if (ch == null) {
        return;
    }

    if (ch.getIP() != null) {
        // do a whois lookup on the ip address
        addWhoisLink(curi, ch.getIP().getHostAddress());
    }

    if (InternetDomainName.isValid(ch.getHostName())) {
        // do a whois lookup on the domain
        try {
            String topmostAssigned = InternetDomainName.from(ch.getHostName()).topPrivateDomain().toString();
            addWhoisLink(curi, topmostAssigned);
        } catch (IllegalStateException e) {
            // java.lang.IllegalStateException: Not under a public suffix: mod.uk
            logger.warning(
                    "problem resolving topmost assigned domain, will try whois lookup on the plain hostname "
                            + ch.getHostName() + " - " + e);
            addWhoisLink(curi, ch.getHostName());
        }
    }
}

From source file:com.spend.spendService.WorkerSearchQueue.java

private void insertSeedLink(String url, String searchEngineName, String text, int resultOrder,
        int pageContentId) {
    try {//  w  w w .j  a v  a  2s .  co m
        URL u = new URL(url);
        String host = u.getHost();
        if (InternetDomainName.isValid(host) || com.google.common.net.HostSpecifier.isValid(host)) {

            PreparedStatement pstmt = con.prepareStatement(
                    "INSERT INTO seedurlraw (url, searchEngine,resultOrder,pageContentId) VALUES (?,?,?,?);");
            pstmt.setString(1, url);
            pstmt.setString(2, searchEngineName);
            pstmt.setString(3, String.valueOf(resultOrder));
            pstmt.setInt(4, pageContentId);

            pstmt.executeUpdate();
            pstmt.close();

            DateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss");
            Object[] row = { searchEngineName, text, url, dateFormat.format(new Date()) };

        }
    } catch (Exception ex) {
        String a = "";
        String b = "";

    }
}