Java URL Normalize normalize(String url_str)

Here you can find the source of normalize(String url_str)

Description

normalize

License

Open Source License

Declaration

public static String normalize(String url_str) throws MalformedURLException, UnsupportedEncodingException 

Method Source Code


//package com.java2s;
//License from project: Open Source License 

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;

public class Main {
    public static String normalize(String url_str) throws MalformedURLException, UnsupportedEncodingException {
        url_str = clean(url_str);// w  w w. ja  v  a2 s.c o m
        url_str = removewww(url_str);
        url_str = translateWhiteSpaces(url_str);
        return url_str;
    }

    public static String clean(String url_str) throws MalformedURLException, UnsupportedEncodingException {
        url_str = url_str.toLowerCase().trim();

        if (!url_str.startsWith("http") && !url_str.startsWith("ftp"))
            throw new MalformedURLException("URl does not start with http or ftp!: " + url_str);

        if (url_str.length() < 5)
            throw new MalformedURLException("Short URL: " + url_str);

        //remove sections
        if (url_str.contains("#"))
            url_str = url_str.substring(0, url_str.lastIndexOf("#"));

        try {
            url_str = URLDecoder.decode(url_str, "UTF-8");
        } catch (IllegalArgumentException exp) {
            if (!exp.getMessage().contains("Illegal hex characters in escape (%) pattern")
                    && !exp.getMessage().contains("Incomplete trailing escape (%) pattern"))
                throw exp;
        }

        URL url = new URL(url_str);

        url_str = url.toString();

        while (url_str.endsWith("/"))
            url_str = url_str.substring(0, url_str.length() - 1);

        if (url_str.length() < 10)
            throw new MalformedURLException("Short URL: " + url_str);

        return url_str;
    }

    public static String removewww(String url) {
        if (url.startsWith("http://www."))
            return url.replaceFirst("http://www\\.", "http://");
        if (url.startsWith("https://www."))
            return url.replaceFirst("https://www\\.", "https://");
        if (url.startsWith("ftp://www."))
            return url.replaceFirst("ftp://www\\.", "ftp://");
        return url;
    }

    public static String translateWhiteSpaces(String url) {
        //return  url.replaceAll("\\s","");
        return url.replaceAll(" ", "%20").replaceAll("\t", "%09").replaceAll("\n", "%0A");
    }
}

Related

  1. normalizeUrl(String url)
  2. normalizeUrlStr(String urlStr)
  3. normalize(String absoluteURL)
  4. normalize(String url)
  5. normalize(String url)
  6. normalize(URL u)
  7. normalize(URL url)
  8. normalize(URL url)
  9. normalizeCapabilitiesUrl(String url)