URI utilities : URI « Network Protocol « Java

URI utilities
     
/* Copyright 2004-2006 Elliotte Rusty Harold
   
   This library is free software; you can redistribute it and/or modify
   it under the terms of version 2.1 of the GNU Lesser General Public 
   License as published by the Free Software Foundation.
   
   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
   GNU Lesser General Public License for more details.
   
   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, write to the 
   Free Software Foundation, Inc., 59 Temple Place, Suite 330, 
   Boston, MA 02111-1307  USA
   
   You can contact Elliotte Rusty Harold by sending e-mail to
   elharo@metalab.unc.edu. Please include the word "XOM" in the
   subject line. The XOM home page is located at http://www.xom.nu/
*/


import java.io.UnsupportedEncodingException;


/**
 * These methods are not fully general.
 * You would need to uncomment some lines to make this a 
 * public API. Certain preconditons for these methods to 
 * operate correctly are true in the context of XOM,
 * but may well not be true in a more general context. 
 * 
 * @author Elliotte Rusty Harold
 * @version 1.2d1
 *
 */
class URIUtil {


    
    static boolean isAbsolute(String uri) {
        
        int colon = uri.indexOf(':');
        if (colon < 1) return false;
        // We assume the URI has already been verified as a potentially 
        // legal URI. Thus we don't have to check everything here.
        /*if (!Verifier.isAlpha(uri.charAt(0))) return false;
        for (int i = 1; i < colon; i++) {
             if (!Verifier.isSchemeCharacter(uri.charAt(i))) return false;
        } */
        return true;
        
    } 
    
    
    // This doesn't do enough error checking to be a public API.
    static String absolutize(String baseURI, String spec) {
        
        if ("".equals(baseURI) || baseURI == null) return spec;
        
        ParsedURI base = new ParsedURI(baseURI);
        
        // This seems to be necessary to handle base URLs like
        // http://www.example.com/test/data/..
        // but I don't think it's part of the 3986 algorithm. 
        // ???? It may be a bug in that algorithm. Check.
        if (base.path.endsWith("/..")) base.path += '/';
        
        // The variable names R and T violate Java naming conventions.
        // They are taken from the pseudo-code in the RFC 3986 spec.
        ParsedURI R = new ParsedURI(spec);
        ParsedURI T = new ParsedURI();
        
        // We should be able to skip this check. basically it
        // asserts that the spec is not an absolute URI already
        /* if (R.scheme != null) {
            T.scheme    = R.scheme;
            T.authority = R.authority;
            T.query     = R.query;
            T.path      = removeDotSegments(R.path); 
        }
        else { */
        if (R.authority != null) {
            T.authority = R.authority;
            T.query     = R.query;
            T.path      = removeDotSegments(R.path); 
        }
        else {
            if ("".equals(R.path)) {
                T.path = base.path;
                if (R.query != null) {
                    T.query = R.query;
                }
                else {
                    T.query = base.query;
                }
            }
            else {
                if (R.path.startsWith("/")) {
                   T.path = removeDotSegments(R.path);
                }
                else {
                   T.path = merge(base, R.path);
                   T.path = removeDotSegments(T.path);
                }
                T.query = R.query;
            }
            T.authority = base.authority;
        }
        T.scheme = base.scheme;
        // }
        // Fragment ID of base URI is never considered
        T.fragment = R.fragment; 
        
        return T.toString();
        
    }
    
    
    private static String merge(ParsedURI base, String relativePath) {
    
        if (base.authority != null && "".equals(base.path) 
          && !"".equals(base.authority)) {
            return "/" + relativePath;
        }
    
        int lastSlash = base.path.lastIndexOf('/');
        if (lastSlash == -1) return relativePath;
        String topPath = base.path.substring(0, lastSlash+1);
        return topPath + relativePath;
        
    }
    
    
    static String removeDotSegments(String path) {
    
        StringBuffer output = new StringBuffer();

        while (path.length() > 0) {
            if (path.startsWith("/./")) {
                path = '/' + path.substring(3);
            }
            else if (path.equals("/.")) {
                path = "/";
            }
            else if (path.startsWith("/../")) {
                path = '/' + path.substring(4);
                int lastSlash = output.toString().lastIndexOf('/');
                if (lastSlash != -1) output.setLength(lastSlash);
            }
            else if (path.equals("/..")) {
                path = "/";
                int lastSlash = output.toString().lastIndexOf('/');
                if (lastSlash != -1) output.setLength(lastSlash);
            }
            // These next three cases are unreachable in the context of XOM.
            // They may be needed in a more general public URIUtil.
            // ???? need to consider whether these are still unreachable now that
            // Builder.canonicalizeURL is calling this method.
            /* else if (path.equals(".") || path.equals("..")) {
                path = "";
            }
            else if (path.startsWith("../")) {
                path = path.substring(3);
            }
            else if (path.startsWith("./")) {
                path = path.substring(2);
            } */
            else {
                int nextSlash = path.indexOf('/');
                if (nextSlash == 0) nextSlash = path.indexOf('/', 1);
                if (nextSlash == -1) {
                    output.append(path);
                    path = "";
                }
                else {
                    output.append(path.substring(0, nextSlash));
                    path = path.substring(nextSlash);
                }
            }
        }
        
        return output.toString();
        
    }


    // really just a struct
    static class ParsedURI {
     
        String scheme;
        String schemeSpecificPart;
        String query;
        String fragment;
        String authority;
        String path = "";
        
        ParsedURI(String spec) {
            
            int colon = spec.indexOf(':');
            int question;
            
            // URIs can only contain one sharp sign
            int sharp = spec.lastIndexOf('#'); 
            
            // Fragment IDs can contain question marks so we only read 
            // the question mark before the fragment ID, if any
            if (sharp == -1) question = spec.indexOf('?');
            else question = spec.substring(0, sharp).indexOf('?');

            if (colon != -1) scheme = spec.substring(0, colon);
            
            if (question == -1 && sharp == -1) {
                schemeSpecificPart = spec.substring(colon+1);
            }
            else if (question != -1) {
                if (question < colon) {
                    throw new RuntimeException("Unparseable URI");
                }
                schemeSpecificPart = spec.substring(colon+1, question);                
            }
            else {
                if (sharp < colon) {
                    throw new RuntimeException("Unparseable URI");
                }
                schemeSpecificPart = spec.substring(colon+1, sharp);
            }
            
            if (sharp != -1) {
                fragment = spec.substring(sharp+1);
            }
            
            if (question != -1) {
                if (sharp == -1) {
                    query = spec.substring(question+1);
                }
                else {
                    query = spec.substring(question+1, sharp);
                }
            }

            if (schemeSpecificPart.startsWith("//")) {
                int authorityBegin = 2;
                int authorityEnd = schemeSpecificPart.indexOf('/', authorityBegin);
                if (authorityEnd == -1) {
                    authority = schemeSpecificPart.substring(2);
                    path = "";
                }
                else {
                    authority = schemeSpecificPart.substring(authorityBegin, authorityEnd);
                    path = schemeSpecificPart.substring(authorityEnd);
                } 
            }
            else {
                path = schemeSpecificPart;
            }
            
        }

        ParsedURI() {}
        
        public String toString() {
        
            StringBuffer result = new StringBuffer(30);
            
            if (scheme != null) {
                result.append(scheme);
                result.append(':');
            }
            
            if (schemeSpecificPart != null) {
                result.append(schemeSpecificPart);
            }
            else {
                result.append("//");
                if (authority != null) result.append(authority);
                result.append(path);
            }
            
            if (query != null) {
                result.append('?');
                result.append(query);
            }
            
            if (fragment != null) {
                result.append('#');
                result.append(fragment);                
            }
            
            return result.toString();
            
        }
        
    }


    static String toURI(String iri) {
    
        int length = iri.length();
        StringBuffer uri = new StringBuffer(length);
        for (int i = 0; i < length; i++) {
            char c = iri.charAt(i);
            switch(c) {
                case ' ':  
                    uri.append("%20");
                    break;
                case '!': 
                    uri.append(c);
                    break;
                case '"': 
                    uri.append("%22");
                    break;
                case '#':  
                    uri.append(c);
                    break;
                case '$':  
                    uri.append(c);
                    break;
                case '%':  
                    uri.append(c);
                    break;
                case '&':  
                    uri.append(c);
                    break;
                case '\'':  
                    uri.append(c);
                    break;
                case '(':  
                    uri.append(c);
                    break;
                case ')':  
                    uri.append(c);
                    break;
                case '*':  
                    uri.append(c);
                    break;
                case '+':  
                    uri.append(c);
                    break;
                case ',':  
                    uri.append(c);
                    break;
                case '-':  
                    uri.append(c);
                    break;
                case '.':  
                    uri.append(c);
                    break;
                case '/':  
                    uri.append(c);
                    break;
                case '0':  
                    uri.append(c);
                    break;
                case '1':  
                    uri.append(c);
                    break;
                case '2':  
                    uri.append(c);
                    break;
                case '3':  
                    uri.append(c);
                    break;
                case '4':  
                    uri.append(c);
                    break;
                case '5':  
                    uri.append(c);
                    break;
                case '6':  
                    uri.append(c);
                    break;
                case '7':  
                    uri.append(c);
                    break;
                case '8':  
                    uri.append(c);
                    break;
                case '9':  
                    uri.append(c);
                    break;
                case ':':  
                    uri.append(c);
                    break;
                case ';':  
                    uri.append(c);
                    break;
                case '<':  
                    uri.append("%3C");
                    break;
                case '=':  
                    uri.append(c);
                    break;
                case '>':  
                    uri.append("%3E");
                    break;
                case '?':  
                    uri.append(c);
                    break;
                case '@':  
                    uri.append(c);
                    break;
                case 'A':  
                    uri.append(c);
                    break;
                case 'B':  
                    uri.append(c);
                    break;
                case 'C':  
                    uri.append(c);
                    break;
                case 'D':  
                    uri.append(c);
                    break;
                case 'E':  
                    uri.append(c);
                    break;
                case 'F':  
                    uri.append(c);
                    break;
                case 'G':  
                    uri.append(c);
                    break;
                case 'H':  
                    uri.append(c);
                    break;
                case 'I':  
                    uri.append(c);
                    break;
                case 'J':  
                    uri.append(c);
                    break;
                case 'K':  
                    uri.append(c);
                    break;
                case 'L':  
                    uri.append(c);
                    break;
                case 'M':  
                    uri.append(c);
                    break;
                case 'N':  
                    uri.append(c);
                    break;
                case 'O':  
                    uri.append(c);
                    break;
                case 'P':  
                    uri.append(c);
                    break;
                case 'Q':  
                    uri.append(c);
                    break;
                case 'R':  
                    uri.append(c);
                    break;
                case 'S':  
                    uri.append(c);
                    break;
                case 'T':  
                    uri.append(c);
                    break;
                case 'U':  
                    uri.append(c);
                    break;
                case 'V':  
                    uri.append(c);
                    break;
                case 'W':  
                    uri.append(c);
                    break;
                case 'X':  
                    uri.append(c);
                    break;
                case 'Y':  
                    uri.append(c);
                    break;
                case 'Z':  
                    uri.append(c);
                    break;
                case '[':  
                    uri.append(c);
                    break;
                case '\\':  
                    uri.append("%5C");
                    break;
                case ']':  
                    uri.append(c);
                    break;
                case '^':  
                    uri.append("%5E");
                    break;
                case '_':  
                    uri.append(c);
                    break;
                case '`':  
                    uri.append("%60");
                    break;
                case 'a':  
                    uri.append(c);
                    break;
                case 'b':  
                    uri.append(c);
                    break;
                case 'c':  
                    uri.append(c);
                    break;
                case 'd':  
                    uri.append(c);
                    break;
                case 'e':  
                    uri.append(c);
                    break;
                case 'f':  
                    uri.append(c);
                    break;
                case 'g':  
                    uri.append(c);
                    break;
                case 'h':  
                    uri.append(c);
                    break;
                case 'i':  
                    uri.append(c);
                    break;
                case 'j':  
                    uri.append(c);
                    break;
                case 'k':  
                    uri.append(c);
                    break;
                case 'l':  
                    uri.append(c);
                    break;
                case 'm':  
                    uri.append(c);
                    break;
                case 'n':  
                    uri.append(c);
                    break;
                case 'o':  
                    uri.append(c);
                    break;
                case 'p':  
                    uri.append(c);
                    break;
                case 'q':  
                    uri.append(c);
                    break;
                case 'r':  
                    uri.append(c);
                    break;
                case 's':  
                    uri.append(c);
                    break;
                case 't':  
                    uri.append(c);
                    break;
                case 'u':  
                    uri.append(c);
                    break;
                case 'v':  
                    uri.append(c);
                    break;
                case 'w':  
                    uri.append(c);
                    break;
                case 'x':  
                    uri.append(c);
                    break;
                case 'y':  
                    uri.append(c);
                    break;
                case 'z':  
                    uri.append(c);
                    break;
                case '{':  
                    uri.append("%7B");
                    break;
                case '|':  
                    uri.append("%7C");
                    break;
                case '}':  
                    uri.append("%7D");
                    break;
                case '~':  
                    uri.append(c);
                    break;
                default: 
                    uri.append(percentEscape(c));
            }
        }    
        return uri.toString();
        
    }

    
    static String percentEscape(char c) {
        
        StringBuffer result = new StringBuffer(3);
        String s = String.valueOf(c);
        try {
            byte[] data = s.getBytes("UTF8");
            for (int i = 0; i < data.length; i++) {
                result.append('%');
                String hex = Integer.toHexString(data[i]).toUpperCase();
                if (c < 16) {
                    result.append('0');
                    result.append(hex);
                }
                else {
                    // When c is negative as a byte, (e.g. greater 
                    // than 128) the hex strings come out as 8 
                    // characters rather than 2. 
                    result.append(hex.substring(hex.length()-2));
                }
            }
            return result.toString();
        }
        catch (UnsupportedEncodingException ex) {
            throw new RuntimeException( 
              "Broken VM: does not recognize UTF-8 encoding");   
        }
        
    }


    static String relativize(String base, String abs) {

        ParsedURI parsedBase = new ParsedURI(base);
        ParsedURI parsedAbs  = new ParsedURI(abs);
        
        parsedBase.path = removeDotSegments(parsedBase.path);
        
        if (parsedBase.scheme.equals(parsedAbs.scheme)
          && parsedBase.authority.equals(parsedAbs.authority)) {
        
            String basePath = parsedBase.path;
            String relPath = parsedAbs.path;
            
            while (basePath.length() > 1) {
                basePath = basePath.substring(0, basePath.lastIndexOf('/'));
                if (relPath.startsWith(basePath)) {
                    return relPath.substring(basePath.length()+1);
                }
            }
            
            return relPath;
        }
        else {
            return abs;
        }

    }

    
}
Related examples in the same category

1.	Encode a URI path
2.	Add two URI path segments
3.	Remove parameters from a uri.
4.	Remove any authorisation details from a URI
5.	A class to represent a Uniform Resource Identifier (URI).
6.	Utility to convert File to URL.
7.	Return True if the uri has a scheme
8.	Normalize a uri containing ../ and ./ paths.
9.	URI Support
10.	Get Parent URI
11.	Create and resolve URI
12.	Add parameters stored in the Map to the uri string.
13.	Make a URL from the given string
14.	Is Equal URI
15.	Resolves the specified URI, and returns the file represented by the URI.
16.	Returns the filename for the specified URI.