Java URL Normalize NormalizeURL(final String taintedURL)

Here you can find the source of NormalizeURL(final String taintedURL)

Description

Normalization code courtesy of 'Mike Houston' http://stackoverflow.com/questions/2993649/how-to-normalize-a-url-in-java

License

Open Source License

Declaration

public static String NormalizeURL(final String taintedURL) throws MalformedURLException 

Method Source Code

//package com.java2s;
/*/* ww  w. jav a 2  s  . c  om*/
 The MIT License (MIT)
    
 PokerFace: Asynchronous, streaming, HTTP/1.1, scriptable, reverse proxy.
    
 Copyright (c) 2015 Frank Stock
    
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
    
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
    
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */

import java.io.UnsupportedEncodingException;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;

import java.util.HashMap;
import java.util.Iterator;

import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

public class Main {
    /**
     * Normalization code courtesy of 'Mike Houston' http://stackoverflow.com/questions/2993649/how-to-normalize-a-url-in-java
     */
    public static String NormalizeURL(final String taintedURL) throws MalformedURLException {
        final URL url;
        try {
            url = new URI(taintedURL).normalize().toURL();
        } catch (URISyntaxException e) {
            throw new MalformedURLException(e.getMessage());
        }

        final String path = url.getPath().replace("/$", "");
        final SortedMap<String, String> params = CreateParameterMap(url.getQuery());
        final int port = url.getPort();
        final String queryString;

        if (params != null) {
            // Some params are only relevant for user tracking, so remove the most commons ones.
            for (Iterator<String> i = params.keySet().iterator(); i.hasNext();) {
                final String key = i.next();
                if (key.startsWith("utm_") || key.contains("session"))
                    i.remove();
            }
            queryString = "?" + Canonicalize(params);
        } else
            queryString = "";

        return url.getProtocol() + "://" + url.getHost() + (port != -1 && port != 80 ? ":" + port : "") + path
                + queryString;
    }

    /**
     * Takes a query string, separates the constituent name-value pairs, and stores them in a SortedMap ordered by lexicographical order.
     * 
     * @return Null if there is no query string.
     */
    private static SortedMap<String, String> CreateParameterMap(final String queryString) {
        if (queryString == null || queryString.isEmpty())
            return null;
        final String[] pairs = queryString.split("&");
        final Map<String, String> params = new HashMap<String, String>(pairs.length);
        for (final String pair : pairs) {
            if (pair.length() < 1)
                continue;
            String[] tokens = pair.split("=", 2);
            for (int j = 0; j < tokens.length; j++) {
                try {
                    tokens[j] = URLDecoder.decode(tokens[j], "UTF-8");
                } catch (UnsupportedEncodingException ex) {
                    ex.printStackTrace();
                }
            }
            switch (tokens.length) {
            case 0:
                break;
            case 1:
                if (pair.charAt(0) == '=')
                    params.put("", tokens[0]);
                else
                    params.put(tokens[0], "");
                break;
            case 2:
            default:
                params.put(tokens[0], tokens[1]);
                break;
            }
        }
        return new TreeMap<String, String>(params);
    }

    /**
     * Canonicalize the query string.
     * 
     * @param sortedParamMap Parameter name-value pairs in lexicographical order.
     * @return Canonical form of query string.
     */
    private static String Canonicalize(final SortedMap<String, String> sortedParamMap) {
        if (sortedParamMap == null || sortedParamMap.isEmpty())
            return "";
        final StringBuffer sb = new StringBuffer(350);
        final Iterator<Map.Entry<String, String>> iter = sortedParamMap.entrySet().iterator();
        while (iter.hasNext()) {
            final Map.Entry<String, String> pair = iter.next();
            sb.append(PercentEncodeRfc3986(pair.getKey()));
            sb.append('=');
            sb.append(PercentEncodeRfc3986(pair.getValue()));
            if (iter.hasNext())
                sb.append('&');
        }
        return sb.toString();
    }

    /**
     * Percent-encode values according the RFC 3986. The built-in Java URLEncoder does not encode according to the RFC, so we make the extra replacements.
     * 
     * @param string Decoded string.
     * @return Encoded string per RFC 3986.
     */
    public static String PercentEncodeRfc3986(final String string) {
        try {
            return URLEncoder.encode(string, "UTF-8").replace("+", "%20").replace("*", "%2A").replace("%7E", "~");
        } catch (UnsupportedEncodingException e) {
            return string;
        }
    }
}

Related

  1. normalizeCapabilitiesUrl(String url)
  2. normalizePrefix(String url)
  3. normalizeShortUrl(String url)
  4. normalizeToLUrl(String toLUrl)
  5. normalizeToURL(String surl)
  6. normalizeUrl(final String url)
  7. normalizeUrl(String baseUrl, List urlList)
  8. normalizeUrl(String baseUrl, String url)
  9. normalizeURL(String solrServerUrl)