Example usage for org.apache.commons.httpclient URIException URIException

List of usage examples for org.apache.commons.httpclient URIException URIException

Introduction

In this page you can find the example usage for org.apache.commons.httpclient URIException URIException.

Prototype

public URIException(String reason) 

Source Link

Document

The constructor with a reason string argument.

Usage

From source file:com.limegroup.gnutella.licenses.LicenseFactory.java

/** Gets a CC license URI from the given license string. */
private static URI getCCLicenseURI(String license) {
    // find where the URL should begin.
    int verifyAt = license.indexOf(CCConstants.URL_INDICATOR);
    if (verifyAt == -1)
        return null;

    int urlStart = verifyAt + CCConstants.URL_INDICATOR.length();
    if (urlStart >= license.length())
        return null;

    String url = license.substring(urlStart).trim();
    URI uri = null;//from  w  w  w  .  j  a  v a 2  s . com
    try {
        uri = new URI(url.toCharArray());

        // Make sure the scheme is HTTP.
        String scheme = uri.getScheme();
        if (scheme == null || !scheme.equalsIgnoreCase("http"))
            throw new URIException("Invalid scheme: " + scheme);
        // Make sure the scheme has some authority.
        String authority = uri.getAuthority();
        if (authority == null || authority.equals("") || authority.indexOf(' ') != -1)
            throw new URIException("Invalid authority: " + authority);

    } catch (URIException e) {
        uri = null;
        LOG.error("Unable to create URI", e);
    }

    return uri;
}

From source file:de.kapsi.net.daap.DaapRequest.java

/**
 * Sets and parses the URI. Note: if URIException is
 * thrown then is this Request in an inconsistent state!
 *
 * @param uri/*from www.j  ava  2s .  co  m*/
 * @throws URIException
 */
private void setURI(URI uri) throws URIException {

    this.uri = uri;

    if (uri != null) {

        String path = uri.getPath();

        this.queryMap = DaapUtil.parseQuery(uri.getQuery());

        if (path.equals("/server-info")) {
            requestType = SERVER_INFO;
        } else if (path.equals("/content-codes")) {
            requestType = CONTENT_CODES;
        } else if (path.equals("/login")) {
            requestType = LOGIN;
        } else if (path.equals("/logout")) {
            requestType = LOGOUT;
        } else if (path.equals("/update")) {
            requestType = UPDATE;
        } else if (path.equals("/resolve")) {
            requestType = RESOLVE;
        }

        if (queryMap.containsKey("session-id")) {
            sessionId = Integer.parseInt((String) queryMap.get("session-id"));
        }

        if (sessionId != DaapUtil.NULL) {

            if (queryMap.containsKey("revision-number")) {
                revisionNumber = Integer.parseInt((String) queryMap.get("revision-number"));
            }

            if (queryMap.containsKey("delta")) {
                delta = Integer.parseInt((String) queryMap.get("delta"));
            }

            if (queryMap.containsKey("meta")) {
                metaString = (String) queryMap.get("meta");
            }

            isUpdateType = (delta != DaapUtil.NULL) && (delta < revisionNumber);

            // "/databases/id/items"                3 tokens
            // "/databases/id/containers"           3 tokens
            // "/databases/id/items/id.format"      4 tokens
            // "/databases/id/containers/id/items"  5 tokens
            if (path.equals("/databases")) {
                requestType = DATABASES;

            } else if (path.startsWith("/databases")) {

                StringTokenizer tok = new StringTokenizer(path, "/");
                int count = tok.countTokens();

                if (count >= 3) {
                    String token = tok.nextToken();

                    if (token.equals("databases") == false) {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@1");
                    }

                    databaseId = Integer.parseInt((String) tok.nextToken());
                    token = tok.nextToken();

                    if (token.equals("items")) {
                        requestType = DATABASE_SONGS;
                    } else if (token.equals("containers")) {
                        requestType = DATABASE_PLAYLISTS;
                    } else {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@2");
                    }

                    if (count == 3) {
                        // do nothing...

                    } else if (count == 4) {

                        token = (String) tok.nextToken();

                        StringTokenizer fileTokenizer = new StringTokenizer(token, ".");

                        if (fileTokenizer.countTokens() == 2) {
                            itemId = Integer.parseInt(fileTokenizer.nextToken());
                            requestType = SONG;

                        } else {
                            throw new URIException("Unknown token in path: " + path + " [" + token + "]@3");
                        }

                    } else if (count == 5) {
                        containerId = Integer.parseInt((String) tok.nextToken());
                        token = (String) tok.nextToken();

                        if (token.equals("items")) {
                            requestType = PLAYLIST_SONGS;

                        } else {
                            throw new URIException("Unknown token in path: " + path + " [" + token + "@4");
                        }

                    } else {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@5");
                    }
                } else {
                    throw new URIException("Unknown token in path: " + path);
                }
            }
        }

    } else {

        queryMap = null;
        metaString = null;
        isUpdateType = false;

        requestType = DaapUtil.NULL;
        databaseId = DaapUtil.NULL;
        containerId = DaapUtil.NULL;
        itemId = DaapUtil.NULL;

        sessionId = DaapUtil.NULL;
        revisionNumber = DaapUtil.NULL;
        delta = DaapUtil.NULL;
    }
}

From source file:com.cyberway.issue.net.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND//  w  w  w.j  a v a  2  s  . co m
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if ((atColon <= 0 && !tmp.startsWith("//")) || (atSlash >= 0 && atSlash < atColon)) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }

    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:com.hipu.bdb.util.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND/*from w w  w.  ja v  a  2 s .  c  o  m*/
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if ((atColon <= 0 && !tmp.startsWith("//")) || (atSlash >= 0 && atSlash < atColon)) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }
    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Check the generated UURI.// w  w w  . j a v a  2  s.  c o m
 * 
 * At the least look at length of uuri string.  We were seeing case
 * where before escaping, string was &lt; MAX_URL_LENGTH but after was
 * &gt;.  Letting out a too-big message was causing us troubles later
 * down the processing chain.
 * @param uuri Created uuri to check.
 * @return The passed <code>uuri</code> so can easily inline this check.
 * @throws URIException
 */
protected UURI validityCheck(UURI uuri) throws URIException {
    if (uuri.getRawURI().length > UURI.MAX_URL_LENGTH) {
        throw new URIException("Created (escaped) uuri > " + UURI.MAX_URL_LENGTH + ": " + uuri.toString());
    }
    return uuri;
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the 
 * vanilla HttpClient URI class or official specs might suggest). 
 *
 * @param uri URI as string.//from  w  ww . j  a va2s .c o m
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines. 
    uri = uri.trim();

    // IE actually converts backslashes to slashes rather than to %5C.
    // Since URIs that have backslashes usually work only with IE, we will
    // convert backslashes to slashes as well.
    // TODO: Maybe we can first convert backslashes by specs and than by IE
    // so that we fetch both versions.
    if (uri.indexOf(BACKSLASH) >= 0) {
        uri = TextUtils.replaceAll(BACKSLASH_PATTERN, uri, SLASH);
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }

    // now, minimally escape any whitespace
    uri = escapeWhitespace(uri);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explaination of group indices used in the below.
    matcher = RFC2396REGEX.matcher(uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));

    // If a scheme, is it a supported scheme?
    if (uriScheme != null && uriScheme.length() > 0 && this.schemes != null) {
        if (!(Arrays.binarySearch(schemes, uriScheme) >= 0)) {
            // unsupported; see if silently ignored
            if ((Arrays.binarySearch(ignoredSchemes, uriScheme) >= 0)) {
                throw new URIException(IGNORED_SCHEME, "Ignored scheme: " + uriScheme);
            } else {
                throw new URIException("Unsupported scheme: " + uriScheme);
            }
        }
    }

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain; 
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec 
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times. 
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO: Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';' 
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:dk.netarkivet.wayback.batch.copycode.NetarchiveSuiteUURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the
 * vanilla HttpClient URI class or official specs might suggest).
 *
 * @param uri URI as string./*from   w  w w.j av  a 2 s . c  om*/
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines.
    uri = uri.trim();

    // IE actually converts backslashes to slashes rather than to %5C.
    // Since URIs that have backslashes usually work only with IE, we will
    // convert backslashes to slashes as well.
    // TODO Maybe we can first convert backslashes by specs and than by IE
    // so that we fetch both versions.
    if (uri.indexOf(BACKSLASH) >= 0) {
        uri = TextUtils.replaceAll(BACKSLASH_PATTERN, uri, SLASH);
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }

    // now, minimally escape any whitespace
    uri = escapeWhitespace(uri);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explaination of group indices used in the below.
    matcher = RFC2396REGEX.matcher(uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));

    // If a scheme, is it a supported scheme?
    if (uriScheme != null && uriScheme.length() > 0 && this.schemes != null) {
        if (!(Arrays.binarySearch(schemes, uriScheme) >= 0)) {
            // unsupported; see if silently ignored
            if ((Arrays.binarySearch(ignoredSchemes, uriScheme) >= 0)) {
                throw new URIException(IGNORED_SCHEME, "Ignored scheme: " + uriScheme);
            } else {
                throw new URIException("Unsupported scheme: " + uriScheme);
            }
        }
    }

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain;
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times.
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';'
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:davmail.exchange.ExchangeSession.java

protected String getAbsoluteUri(HttpMethod method, String path) throws URIException {
    URI uri = method.getURI();/*w  ww . j a va 2s  .co  m*/
    if (path != null) {
        // reset query string
        uri.setQuery(null);
        if (path.startsWith("/")) {
            // path is absolute, replace method path
            uri.setPath(path);
        } else if (path.startsWith("http://") || path.startsWith("https://")) {
            return path;
        } else {
            // relative path, build new path
            String currentPath = method.getPath();
            int end = currentPath.lastIndexOf('/');
            if (end >= 0) {
                uri.setPath(currentPath.substring(0, end + 1) + path);
            } else {
                throw new URIException(uri.getURI());
            }
        }
    }
    return uri.getURI();
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * If http(s) scheme, check scheme specific part begins '//'.
 * @throws URIException /*from  w  ww .ja v a 2s .co m*/
 * @see http://www.faqs.org/rfcs/rfc1738.html Section 3.1. Common Internet
 * Scheme Syntax
 */
protected void checkHttpSchemeSpecificPartSlashPrefix(final URI base, final String scheme,
        final String schemeSpecificPart) throws URIException {
    if (scheme == null || scheme.length() <= 0) {
        return;
    }
    if (!scheme.equals("http") && !scheme.equals("https")) {
        return;
    }
    if (schemeSpecificPart == null || !schemeSpecificPart.startsWith("//")) {
        // only acceptable if schemes match
        if (base == null || !scheme.equals(base.getScheme())) {
            throw new URIException("relative URI with scheme only allowed for " + "scheme matching base");
        }
        return;
    }
    if (schemeSpecificPart.length() <= 2) {
        throw new URIException("http scheme specific part is " + "too short: " + schemeSpecificPart);
    }
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Fixup the domain label part of the authority.
 * /* www .  j  av  a  2s.  c o m*/
 * We're more lax than the spec. in that we allow underscores.
 * 
 * @param label Domain label to fix.
 * @return Return fixed domain label.
 * @throws URIException
 */
private String fixupDomainlabel(String label) throws URIException {

    // apply IDN-punycoding, as necessary
    try {
        // TODO: optimize: only apply when necessary, or
        // keep cache of recent encodings
        label = IDNA.toASCII(label);
    } catch (IDNAException e) {
        if (TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN, label)) {
            // domain name has ACE prefix, leading/trailing dash, or 
            // underscore -- but is still a name we wish to tolerate;
            // simply continue
        } else {
            // problematic domain: neither ASCII acceptable characters
            // nor IDN-punycodable, so throw exception 
            // TODO: change to HeritrixURIException so distinguishable
            // from URIExceptions in library code
            URIException ue = new URIException(e + " " + label);
            ue.initCause(e);
            throw ue;
        }
    }
    label = label.toLowerCase();
    return label;
}