Normalizes an URL : URL « Network Protocol « Java






Normalizes an URL

    
/*
 * Copyright (c) JForum Team
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, 
 * with or without modification, are permitted provided 
 * that the following conditions are met:
 * 
 * 1) Redistributions of source code must retain the above 
 * copyright notice, this list of conditions and the 
 * following  disclaimer.
 * 2)  Redistributions in binary form must reproduce the 
 * above copyright notice, this list of conditions and 
 * the following disclaimer in the documentation and/or 
 * other materials provided with the distribution.
 * 3) Neither the name of "Rafael Steil" nor 
 * the names of its contributors may be used to endorse 
 * or promote products derived from this software without 
 * specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT 
 * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
 * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 * IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
 * 
 * Created on 11/07/2005 00:32:01
 * The JForum Project
 * http://www.jforum.net
 */

/**
 * Normalizes an URL. 
 * Normalization means replacing blank spaces by underlines, 
 * changing special chars by its regular form and etc.
 * @author Rafael Steil
 * @version $Id: URLNormalizer.java,v 1.4 2006/08/20 22:47:42 rafaelsteil Exp $
 */
public class URLNormalizer
{
  public static final int LIMIT = 30;
  
  /**
   * Normalizes an URL.
   * The url will be truncated at {@link #LIMIT} chars
   * @param url the url to normalize
   * @return the normalized url
   * @see #normalize(String, int, boolean)
   */
  public static String normalize(String url)
  {
    return normalize(url, LIMIT, true);
  }
  
  /**
   * 
   * @param url the url to normalize
   * @param limit do not process more than <code>limit + 1</code> chars
   * @param friendlyTruncate If <code>true</code>, will try to not cut a word if
   * more than <code>limit</code> chars were processed. It will stop in the next
   * special char
   * @return the normalized url
   */
  public static String normalize(String url, int limit, boolean friendlyTruncate)
  {
    char[] chars = url.toCharArray();
    
    StringBuffer sb = new StringBuffer(url.length());
    
    for (int i = 0; i < chars.length; i++) {
      if (i <= limit || (friendlyTruncate && i > limit && sb.charAt(sb.length() - 1) != '_')) {
        
        if (Character.isSpaceChar(chars[i]) || chars[i] == '-') {
          if (friendlyTruncate && i > limit) {
            break;
          }
          
          if (i > 0 && sb.charAt(sb.length() - 1) != '_') {
            sb.append('_');
          }
        }
        
        if (Character.isLetterOrDigit(chars[i])) {
          sb.append(chars[i]);
        }
        else if (friendlyTruncate && i > limit) {
          break;
        }
      }
    }
    
    return sb.toString().toLowerCase();
  }
}
///////////////
/*
 * Created on 11/07/2005 00:25:19
 */
package net.jforum.util;

import junit.framework.TestCase;

/**
 * Remove special chars, spaces and etc from a string
 * @author Rafael Steil
 * @version $Id: URLNormalizerTest.java,v 1.4 2005/07/26 04:01:12 diegopires Exp $
 */
public class URLNormalizerTest extends TestCase
{
  public void testReplaceSpaceByUnderline()
  {
    String s = "this is a test";
    String normalized = URLNormalizer.normalize(s);
    
    assertEquals("this_is_a_test", normalized);
  }
  
  public void testFriendlyLimit()
  {
    String s = "this is long string used for testing the limit";
    String normalized = URLNormalizer.normalize(s);
    
    assertEquals("this_is_long_string_used_for_testing", normalized);
  }
  
  public void testUnfriendlyLimit()
  {
    String s = "this is long string used for testing the limit";
    String normalized = URLNormalizer.normalize(s, URLNormalizer.LIMIT, false);
    
    assertEquals("this_is_long_string_used_for_te", normalized);
  }
  
  public void testFriendlyLimitWithParentesis()
  {
    String s = "this is long string used for testing(the limit)";
    String normalized = URLNormalizer.normalize(s);
    
    assertEquals("this_is_long_string_used_for_testing", normalized);
  }
  
  public void testRemovePlusParentesis()
  {
    String s = "a test + some + 2 thing(s)";
    String normalized = URLNormalizer.normalize(s);
    
    assertEquals("a_test_some_2_things", normalized);
  }
  
  public void testRemovePorcentageDollarStarEtc()
  {
    String s = "!@#$%^&*";
    String normalized = URLNormalizer.normalize(s);
    
    assertEquals("", normalized);
  }
}

   
    
    
    
  








Related examples in the same category

1.Creating a URL with a single string.
2.Creating a URL With components
3.Converting Between a Filename Path and a URL
4.URL Constructor Test
5.URL Encode Test
6.Get URL Content
7.Get URL Parts
8.Read from a URL
9.Convert a URL to a URI
10.Converting Between a URL and a URI
11.Convert an absolute URI to a URL
12.URL Equality
13.Parsing a URL
14.URL Request
15.URL Get
16.A URL Retrieval Example
17.URL Reader
18.URL Connection ReaderURL Connection Reader
19.Using URLConnection
20.Parse URLParse URL
21.Resolve a relative URL
22.sends e-mail using a mailto: URLsends e-mail using a mailto: URL
23.Convert the absolute URI to a URL object
24.Convert URI to URL
25.Get parts of a url
26.Checks, whether the URL uses a file based protocol.
27.Add Parameter to URL
28.Returns the anchor value of the given URL
29.Extracts the file name from the URL.
30.Creates a relative url by stripping the common parts of the the url.
31.Checks, whether the URL points to the same service. A service is equal if the protocol, host and port are equal.
32.Extracts the base URL from the given URL by stripping the query and anchor part.
33.Returns true if the URL represents a path, and false otherwise.
34.Parse Port
35.Parse Host
36.Given a URL check if its a jar url(jar:!/archive) and if it is, extract the archive entry into the given dest directory and return a file URL to its location
37.check the validity of url pattern according to the spec.
38.A collection of File, URL and filename utility methods
39.Build Relative URL Path
40.Checks that the protocol://host:port part of two URLs are equal
41.Create valid URL from a system id
42.Extract URL File Name
43.Extract the URL page name from the given path
44.Get Domain Name
45.Get Locale From String
46.Get URL Last Modified
47.Get the name of the parent of the given URL path
48.Get the parent of the given URL path
49.Has URLContent Changed
50.Is URL a local file
51.Normalize an URL
52.Resolve a relative URL string against an absolute URL string
53.ResourceBundle String manager
54.Save URL contents to a file
55.URL Path: standardize the creation of mutation of path-like structures
56.Utility class for building URLs
57.Add Default Port to a URL If Missing
58.Get Relative Path To URL
59.Download from a URL and save to a file