Example usage for java.net URL getHost

List of usage examples for java.net URL getHost

Introduction

In this page you can find the example usage for java.net URL getHost.

Prototype

public String getHost() 

Source Link

Document

Gets the host name of this URL , if applicable.

Usage

From source file:jp.igapyon.selecrawler.SeleCrawlerWebContentAnalyzer.java

public static String adjustAnchorUrl(String href, String urlString) throws MalformedURLException {
    if (href.trim().length() == 0) {
        return null;
    }//  w ww. j a  va2 s  .c om
    if (href.startsWith("#")) {
        return null;
    }

    if (href.startsWith("http") == false) {
        if (href.startsWith("//")) {
            final URL url = new URL(urlString);
            href = url.getProtocol() + ":" + href;
        } else if (href.startsWith("/")) {
            final URL url = new URL(urlString);
            final String hostpart = url.getProtocol() + "://" + url.getHost();
            href = hostpart + href;
        } else if (href.startsWith(".")) {
            if (urlString.endsWith("/")) {
                href = urlString + href;
            } else {
                href = urlString.substring(0, urlString.lastIndexOf('/') + 1) + href;
            }
        } else {
            // ?????
            if (urlString.endsWith("/")) {
                // ????
                href = urlString + href;
            } else {
                href = urlString.substring(0, urlString.lastIndexOf('/') + 1) + href;
            }
        }
    }

    return href;
}

From source file:com.digitalpebble.storm.crawler.protocol.http.HttpRobotRulesParser.java

/**
 * Compose unique key to store and access robot rules in cache for given URL
 *//* ww  w . java  2 s.  c  om*/
protected static String getCacheKey(URL url) {
    String protocol = url.getProtocol().toLowerCase(Locale.ROOT); // normalize
                                                                  // to
                                                                  // lower
                                                                  // case
    String host = url.getHost().toLowerCase(Locale.ROOT); // normalize to
                                                          // lower case
    int port = url.getPort();
    if (port == -1) {
        port = url.getDefaultPort();
    }
    /*
     * Robot rules apply only to host, protocol, and port where robots.txt
     * is hosted (cf. NUTCH-1752). Consequently
     */
    String cacheKey = protocol + ":" + host + ":" + port;
    return cacheKey;
}

From source file:edu.kit.dama.staging.entities.AdalapiProtocolConfiguration.java

/**
 * Get the unique protocol identifier for the provided Url. The identifier
 * is generated using the schema://from w  w w .j av a 2 s.c  o  m
 *
 * protocol[@host][:port]
 *
 * Valid identifiers according to this schema are e.g. http@myHost;
 * http@myHost:8080; ftp@anotherHost; file
 *
 * As there is no host/port information for Urls accessed via file protocol,
 * there is only one valid identifier for file Urls.
 *
 * @param pUrl A sample URL (protocol and authority are sufficient, e.g.
 * http://remoteHost:8080) as it should be accessed by the provided protocol
 * implementation.
 *
 * @return The identifier string.
 */
public final static String getProtocolIdentifier(URL pUrl) {
    String protocol = pUrl.getProtocol();
    if (protocol == null) {
        throw new IllegalArgumentException("The provided Url " + pUrl + " has no protocol specified.");
    }
    String host = pUrl.getHost();
    int port = pUrl.getPort();

    if (host == null) {
        return protocol;
    } else {
        return protocol + "@" + host + ((port > -1) ? ":" + Integer.toString(port) : "");
    }
}

From source file:eu.fthevenet.binjr.sources.jrds.adapters.JrdsDataAdapter.java

/**
 * Builds a new instance of the {@link JrdsDataAdapter} class from the provided parameters.
 *
 * @param address the URL to the JRDS webapp.
 * @param zoneId  the id of the time zone used to record dates.
 * @return a new instance of the {@link JrdsDataAdapter} class.
 *//*  w w w .jav a  2  s  . c o  m*/
public static JrdsDataAdapter fromUrl(String address, ZoneId zoneId, JrdsTreeViewTab treeViewTab, String filter)
        throws DataAdapterException {
    try {
        // Detect if URL protocol is present. If not, assume http.
        if (!uriSchemePattern.matcher(address).find()) {
            address = "http://" + address;
        }
        URL url = new URL(address.replaceAll("/$", ""));
        if (url.getHost().trim().isEmpty()) {
            throw new CannotInitializeDataAdapterException("Malformed URL: no host");
        }
        return new JrdsDataAdapter(url, zoneId, "utf-8", treeViewTab, filter);
    } catch (MalformedURLException e) {
        throw new CannotInitializeDataAdapterException("Malformed URL: " + e.getMessage(), e);
    }
}

From source file:org.apache.kylin.engine.mr.common.HadoopStatusGetter.java

private static boolean isValidURL(String value) {
    if (StringUtils.isNotEmpty(value)) {
        java.net.URL url;
        try {/*from  w ww . j a  v a 2 s .  co m*/
            url = new java.net.URL(value);
        } catch (MalformedURLException var5) {
            return false;
        }

        return StringUtils.isNotEmpty(url.getProtocol()) && StringUtils.isNotEmpty(url.getHost());
    }

    return false;
}

From source file:HttpTransactionUtils.java

/**
 * Format a base URL string ( protocol://server[:port][/file-specification] )
 * //from w ww . j  av a2s.  co m
 * @param url
 *          URL to format
 * @param preserveFile
 *          Keep the /directory/filename portion of the URL?
 * @return URL string
 */
public static String formatUrl(URL url, boolean preserveFile) throws MalformedURLException {
    StringBuilder result;
    int port;

    result = new StringBuilder(url.getProtocol());

    result.append("://");
    result.append(url.getHost());

    if ((port = url.getPort()) != -1) {
        result.append(":");
        result.append(String.valueOf(port));
    }

    if (preserveFile) {
        String file = url.getFile();

        if (file != null) {
            result.append(file);
        }
    }
    return result.toString();
}

From source file:com.icloud.framework.http.URLUtil.java

/**
 * Returns the {@link DomainSuffix} corresponding to the last public part of
 * the hostname//from w w  w.j  a v a 2  s.co m
 */
public static DomainSuffix getDomainSuffix(URL url) {
    DomainSuffixes tlds = DomainSuffixes.getInstance();
    String host = url.getHost();
    if (IP_PATTERN.matcher(host).matches())
        return null;

    int index = 0;
    String candidate = host;
    for (; index >= 0;) {
        index = candidate.indexOf('.');
        String subCandidate = candidate.substring(index + 1);
        DomainSuffix d = tlds.get(subCandidate);
        if (d != null) {
            return d;
        }
        candidate = subCandidate;
    }
    return null;
}

From source file:com.icloud.framework.http.URLUtil.java

public static boolean isIPPattern(URL url) {
    //      DomainSuffixes tlds = DomainSuffixes.getInstance();

    String host = url.getHost();
    // it seems that java returns hostnames ending with .
    if (host.endsWith("."))
        host = host.substring(0, host.length() - 1);
    if (IP_PATTERN.matcher(host).matches())
        return true;

    return false;
}

From source file:com.icloud.framework.http.URLUtil.java

/**
 * Returns the domain name of the url. The domain name of a url is the
 * substring of the url's hostname, w/o subdomain names. As an example <br>
 * <code>// w  w w  .j ava 2s .  c o  m
 *  getDomainName(conf, new URL(http://lucene.apache.org/))
 *  </code><br>
 * will return <br>
 * <code> apache.org</code>
 * */
public static String getDomainName(URL url) {
    DomainSuffixes tlds = DomainSuffixes.getInstance();
    String host = url.getHost();
    // it seems that java returns hostnames ending with .
    if (host.endsWith("."))
        host = host.substring(0, host.length() - 1);
    if (IP_PATTERN.matcher(host).matches())
        return host;

    int index = 0;
    String candidate = host;
    for (; index >= 0;) {
        index = candidate.indexOf('.');
        String subCandidate = candidate.substring(index + 1);
        if (tlds.isDomainSuffix(subCandidate)) {
            return candidate;
        }
        candidate = subCandidate;
    }
    return candidate;
}

From source file:com.zimbra.cs.servlet.util.AuthUtil.java

public static String getRedirectURL(HttpServletRequest req, Server server, boolean isAdminRequest,
        boolean relative) throws ServiceException, MalformedURLException {
    String redirectUrl;/*from w  w  w .j  a  v a  2 s  .c  o  m*/
    if (isAdminRequest) {
        redirectUrl = getAdminURL(server, relative);
    } else {
        redirectUrl = getMailURL(server, relative);
    }
    if (!relative) {
        URL url = new URL(redirectUrl);

        // replace host of the URL to the host the request was sent to
        String reqHost = req.getServerName();
        String host = url.getHost();

        if (!reqHost.equalsIgnoreCase(host)) {
            URL destUrl = new URL(url.getProtocol(), reqHost, url.getPort(), url.getFile());
            redirectUrl = destUrl.toString();
        }
    }
    return redirectUrl;
}