Java URL Normalize NormalizeURL(String URL)

Here you can find the source of NormalizeURL(String URL)

Description

Normalize URL

License

Open Source License

Declaration

public static String NormalizeURL(String URL) 

Method Source Code

//package com.java2s;
/*//from   w  ww .  java 2  s . c o  m
 * HTML-cleaner used in TREC 19,20,21 adhoc.
 *
 * Boytsov, L., Belova, A., 2011. Evaluating Learning-to-Rank Methods in the Web Track Adhoc Task. 
 * In TREC-20: Proceedings of the Nineteenth Text REtrieval Conference.  
 *
 * Author: Leonid Boytsov
 * Copyright (c) 2013
 *
 * This code is released under the
 * Apache License Version 2.0 http://www.apache.org/licenses/.
 */

import java.net.URI;

public class Main {
    public static String NormalizeURL(String URL) {
        URI uri;
        try {
            uri = new URI(URL);
        } catch (Exception e) {
            return URL.trim();
        }
        String host = uri.getHost();
        String scheme = uri.getScheme();

        if (host == null
                || scheme == null
                || (!scheme.equals("http") && !scheme.equals("https") && !scheme
                        .equals("ftp"))) {
            return URL.trim();
        }

        String Path = uri.getPath();

        if (Path == null || Path.isEmpty()) {
            Path = "/";
        }

        try {
            uri = new URI(scheme, null /* user info */, host,
                    uri.getPort(), Path, null /* query */, null /* fragment */);
        } catch (Exception e) {
            return URL.trim();
        }
        ;

        return uri.toString().trim();
    }
}

Related

  1. normalizeUrl(String baseUrl, List urlList)
  2. normalizeUrl(String baseUrl, String url)
  3. normalizeURL(String solrServerUrl)
  4. normalizeUrl(String url)
  5. normalizeUrl(String url)
  6. normalizeUrl(String url)
  7. normalizeUrl(String url)
  8. normalizeUrl(String url)
  9. normalizeUrl(String url)