Java URI Normalize normalizeURI(String uri)

Here you can find the source of normalizeURI(String uri)

Description

Normalizes a URI as specified in section 6.2.2 of RFC 3986

License

Apache License

Parameter

Parameter Description
uri a URI

Exception

Parameter Description
URISyntaxException an exception
UnsupportedEncodingException an exception

Return

an RFC 3986 URI normalized according to section 6.2.2.

Declaration

public static URI normalizeURI(String uri) throws URISyntaxException,
        UnsupportedEncodingException 

Method Source Code

//package com.java2s;
/* Copyright 2014 MITRE Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.// w w  w . j a  v a 2s .c om
 */

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;

import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;

public class Main {
    /**
     * Normalizes a URI as specified in section 6.2.2 of RFC 3986
     * @param uri a URI 
     * @return an RFC 3986 URI normalized according to section 6.2.2.
     * @throws URISyntaxException
     * @throws UnsupportedEncodingException 
     */
    public static URI normalizeURI(String uri) throws URISyntaxException,
            UnsupportedEncodingException {
        return normalizeURI(new URI(uri));
    }

    /**
     * Normalizes a URI as specified in section 6.2.2 of RFC 3986.
     * At present, this does nothing for opaque URIs (such as URNs, and mailto:foo@bar.com).  For non-opaque
     * URIs, it standardizes the case of escaped octets, hostname, fixes port references, alphebetizes and
     * properly encodes query string parameters, and resolves relative paths.
     * @param uri a URI 
     * @return an RFC 3986 URI normalized according to section 6.2.2.
     * @throws URISyntaxException
     * @throws UnsupportedEncodingException 
     */
    public static URI normalizeURI(URI uri) throws URISyntaxException,
            UnsupportedEncodingException {
        if (uri.isOpaque())
            return uri;

        uri = uri.normalize();

        String scheme = uri.getScheme();
        String userInfo = uri.getUserInfo();
        String host = uri.getHost();
        String path = uri.getPath();
        String query = uri.getQuery();
        String fragment = uri.getFragment();
        Integer port = uri.getPort();

        if (path == null || "".equals(path))
            path = "/";
        if (scheme != null)
            scheme = scheme.toLowerCase();
        if (host != null)
            host = host.toLowerCase();
        if (port != null && port.equals(getPortForScheme(scheme)))
            port = null;

        if (port != null)
            return new URI(scheme, userInfo, host, port, URLEncoder.encode(
                    path, "UTF-8").replaceAll("%2F", "/"),
                    normalizeQueryString(query), (fragment == null ? null
                            : URLEncoder.encode(fragment, "UTF-8")));
        else {
            String authority = host;
            if (userInfo != null)
                authority = userInfo + "@" + host;
            return new URI(scheme, authority, URLEncoder.encode(path,
                    "UTF-8").replaceAll("%2F", "/"),
                    normalizeQueryString(query), (fragment == null ? null
                            : URLEncoder.encode(fragment, "UTF-8")));
        } // End else
    }

    /**
     * See http://www.iana.org/assignments/port-numbers.  This is a partial list of only the most common.
     * @param scheme a scheme within a URI (such as http, ftp, ssh, etc)
     * @return the standard port number for that scheme.
     */
    private static Integer getPortForScheme(String scheme) {
        scheme = scheme.toLowerCase();

        if ("http".equals(scheme))
            return 80;
        if ("ftp".equals(scheme))
            return 21;
        if ("ssh".equals(scheme))
            return 22;
        if ("telnet".equals(scheme))
            return 23;
        if ("gopher".equals(scheme))
            return 70;
        if ("http-alt".equals(scheme))
            return 8080;
        if ("radan-http".equals(scheme))
            return 8088;
        if ("dnsix".equals(scheme))
            return 90;
        if ("echo".equals(scheme))
            return 7;
        if ("daytime".equals(scheme))
            return 13;
        if ("smtp".equals(scheme))
            return 25;
        if ("time".equals(scheme))
            return 37;

        return null;
    }

    /**
     * Given an un-encoded URI query string, this will return a normalized, properly encoded URI query string.
     * <b>Important:</b> This method uses java's URLEncoder, which returns things that are 
     * application/x-www-form-urlencoded, instead of things that are properly octet-esacped as the URI spec
     * requires.  As a result, some substitutions are made to properly translate space characters to meet the
     * URI spec.
     * @param queryString
     * @return
     */
    private static String normalizeQueryString(String queryString)
            throws UnsupportedEncodingException {
        if ("".equals(queryString) || queryString == null)
            return queryString;

        String[] pieces = queryString.split("&");
        HashMap<String, String> kvp = new HashMap<String, String>();
        StringBuffer builder = new StringBuffer("");

        for (int x = 0; x < pieces.length; x++) {
            String[] bs = pieces[x].split("=", 2);
            bs[0] = URLEncoder.encode(bs[0], "UTF-8");
            if (bs.length == 1)
                kvp.put(bs[0], null);
            else {
                kvp.put(bs[0], URLEncoder.encode(bs[1], "UTF-8")
                        .replaceAll("\\+", "%20"));
            }
        }

        // Sort the keys alphabetically, ignoring case.
        ArrayList<String> keys = new ArrayList<String>(kvp.keySet());
        Collections.sort(keys, new Comparator<String>() {
            public int compare(String o1, String o2) {
                return o1.compareToIgnoreCase(o2);
            }
        });

        // With the alphabetic list of parameter names, re-build the query string.
        for (int x = 0; x < keys.size(); x++) {
            // Some parameters have no value, and are simply present.  If so, we put null in kvp,
            // and we just put the parameter name, no "=value".
            if (kvp.get(keys.get(x)) == null)
                builder.append(keys.get(x));
            else
                builder.append(keys.get(x) + "=" + kvp.get(keys.get(x)));

            if (x < (keys.size() - 1))
                builder.append("&");
        }

        return builder.toString();
    }
}

Related

  1. normalizedSetCookiePath(final String path, final URI originUri)
  2. normalizedUri(URI uri)
  3. normalizeGitRepoLocation(URI location)
  4. normalizeLink(String link, URI parent, boolean removeParams)
  5. normalizeURI(final URI uri)
  6. normalizeURIPath(String uri)
  7. normalizeUriPath(String uriPath)