Example usage for org.apache.commons.httpclient URIException URIException

Introduction

In this page you can find the example usage for org.apache.commons.httpclient URIException URIException.

Prototype

public URIException(String reason)

Source Link

Document

The constructor with a reason string argument.

Usage

From source file:com.limegroup.gnutella.licenses.LicenseFactory.java

/** Gets a CC license URI from the given license string. */
private static URI getCCLicenseURI(String license) {
    // find where the URL should begin.
    int verifyAt = license.indexOf(CCConstants.URL_INDICATOR);
    if (verifyAt == -1)
        return null;

    int urlStart = verifyAt + CCConstants.URL_INDICATOR.length();
    if (urlStart >= license.length())
        return null;

    String url = license.substring(urlStart).trim();
    URI uri = null;//from  w  w  w  .  j  a  v a 2  s . com
    try {
        uri = new URI(url.toCharArray());

        // Make sure the scheme is HTTP.
        String scheme = uri.getScheme();
        if (scheme == null || !scheme.equalsIgnoreCase("http"))
            throw new URIException("Invalid scheme: " + scheme);
        // Make sure the scheme has some authority.
        String authority = uri.getAuthority();
        if (authority == null || authority.equals("") || authority.indexOf(' ') != -1)
            throw new URIException("Invalid authority: " + authority);

    } catch (URIException e) {
        uri = null;
        LOG.error("Unable to create URI", e);
    }

    return uri;
}

From source file:de.kapsi.net.daap.DaapRequest.java

/**
 * Sets and parses the URI. Note: if URIException is
 * thrown then is this Request in an inconsistent state!
 *
 * @param uri/*from www.j  ava  2s .  co  m*/
 * @throws URIException
 */
private void setURI(URI uri) throws URIException {

    this.uri = uri;

    if (uri != null) {

        String path = uri.getPath();

        this.queryMap = DaapUtil.parseQuery(uri.getQuery());

        if (path.equals("/server-info")) {
            requestType = SERVER_INFO;
        } else if (path.equals("/content-codes")) {
            requestType = CONTENT_CODES;
        } else if (path.equals("/login")) {
            requestType = LOGIN;
        } else if (path.equals("/logout")) {
            requestType = LOGOUT;
        } else if (path.equals("/update")) {
            requestType = UPDATE;
        } else if (path.equals("/resolve")) {
            requestType = RESOLVE;
        }

        if (queryMap.containsKey("session-id")) {
            sessionId = Integer.parseInt((String) queryMap.get("session-id"));
        }

        if (sessionId != DaapUtil.NULL) {

            if (queryMap.containsKey("revision-number")) {
                revisionNumber = Integer.parseInt((String) queryMap.get("revision-number"));
            }

            if (queryMap.containsKey("delta")) {
                delta = Integer.parseInt((String) queryMap.get("delta"));
            }

            if (queryMap.containsKey("meta")) {
                metaString = (String) queryMap.get("meta");
            }

            isUpdateType = (delta != DaapUtil.NULL) && (delta < revisionNumber);

            // "/databases/id/items"                3 tokens
            // "/databases/id/containers"           3 tokens
            // "/databases/id/items/id.format"      4 tokens
            // "/databases/id/containers/id/items"  5 tokens
            if (path.equals("/databases")) {
                requestType = DATABASES;

            } else if (path.startsWith("/databases")) {

                StringTokenizer tok = new StringTokenizer(path, "/");
                int count = tok.countTokens();

                if (count >= 3) {
                    String token = tok.nextToken();

                    if (token.equals("databases") == false) {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@1");
                    }

                    databaseId = Integer.parseInt((String) tok.nextToken());
                    token = tok.nextToken();

                    if (token.equals("items")) {
                        requestType = DATABASE_SONGS;
                    } else if (token.equals("containers")) {
                        requestType = DATABASE_PLAYLISTS;
                    } else {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@2");
                    }

                    if (count == 3) {
                        // do nothing...

                    } else if (count == 4) {

                        token = (String) tok.nextToken();

                        StringTokenizer fileTokenizer = new StringTokenizer(token, ".");

                        if (fileTokenizer.countTokens() == 2) {
                            itemId = Integer.parseInt(fileTokenizer.nextToken());
                            requestType = SONG;

                        } else {
                            throw new URIException("Unknown token in path: " + path + " [" + token + "]@3");
                        }

                    } else if (count == 5) {
                        containerId = Integer.parseInt((String) tok.nextToken());
                        token = (String) tok.nextToken();

                        if (token.equals("items")) {
                            requestType = PLAYLIST_SONGS;

                        } else {
                            throw new URIException("Unknown token in path: " + path + " [" + token + "@4");
                        }

                    } else {
                        throw new URIException("Unknown token in path: " + path + " [" + token + "]@5");
                    }
                } else {
                    throw new URIException("Unknown token in path: " + path);
                }
            }
        }

    } else {

        queryMap = null;
        metaString = null;
        isUpdateType = false;

        requestType = DaapUtil.NULL;
        databaseId = DaapUtil.NULL;
        containerId = DaapUtil.NULL;
        itemId = DaapUtil.NULL;

        sessionId = DaapUtil.NULL;
        revisionNumber = DaapUtil.NULL;
        delta = DaapUtil.NULL;
    }
}

From source file:com.cyberway.issue.net.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND//  w  w  w.j  a v a  2  s  . co m
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if ((atColon <= 0 && !tmp.startsWith("//")) || (atSlash >= 0 && atSlash < atColon)) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }

    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:com.hipu.bdb.util.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND/*from w w  w.  ja v  a  2 s .  c  o  m*/
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if ((atColon <= 0 && !tmp.startsWith("//")) || (atSlash >= 0 && atSlash < atColon)) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }
    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Check the generated UURI.// w  w w  . j a v a  2  s.  c o m
 * 
 * At the least look at length of uuri string.  We were seeing case
 * where before escaping, string was &lt; MAX_URL_LENGTH but after was
 * &gt;.  Letting out a too-big message was causing us troubles later
 * down the processing chain.
 * @param uuri Created uuri to check.
 * @return The passed <code>uuri</code> so can easily inline this check.
 * @throws URIException
 */
protected UURI validityCheck(UURI uuri) throws URIException {
    if (uuri.getRawURI().length > UURI.MAX_URL_LENGTH) {
        throw new URIException("Created (escaped) uuri > " + UURI.MAX_URL_LENGTH + ": " + uuri.toString());
    }
    return uuri;
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the 
 * vanilla HttpClient URI class or official specs might suggest). 
 *
 * @param uri URI as string.//from  w  ww . j  a va2s .c o m
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines. 
    uri = uri.trim();

    // IE actually converts backslashes to slashes rather than to %5C.
    // Since URIs that have backslashes usually work only with IE, we will
    // convert backslashes to slashes as well.
    // TODO: Maybe we can first convert backslashes by specs and than by IE
    // so that we fetch both versions.
    if (uri.indexOf(BACKSLASH) >= 0) {
        uri = TextUtils.replaceAll(BACKSLASH_PATTERN, uri, SLASH);
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }

    // now, minimally escape any whitespace
    uri = escapeWhitespace(uri);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explaination of group indices used in the below.
    matcher = RFC2396REGEX.matcher(uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));

    // If a scheme, is it a supported scheme?
    if (uriScheme != null && uriScheme.length() > 0 && this.schemes != null) {
        if (!(Arrays.binarySearch(schemes, uriScheme) >= 0)) {
            // unsupported; see if silently ignored
            if ((Arrays.binarySearch(ignoredSchemes, uriScheme) >= 0)) {
                throw new URIException(IGNORED_SCHEME, "Ignored scheme: " + uriScheme);
            } else {
                throw new URIException("Unsupported scheme: " + uriScheme);
            }
        }
    }

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain; 
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec 
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times. 
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO: Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';' 
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:dk.netarkivet.wayback.batch.copycode.NetarchiveSuiteUURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the
 * vanilla HttpClient URI class or official specs might suggest).
 *
 * @param uri URI as string./*from   w  w w.j av  a 2 s . c  om*/
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines.
    uri = uri.trim();

    // IE actually converts backslashes to slashes rather than to %5C.
    // Since URIs that have backslashes usually work only with IE, we will
    // convert backslashes to slashes as well.
    // TODO Maybe we can first convert backslashes by specs and than by IE
    // so that we fetch both versions.
    if (uri.indexOf(BACKSLASH) >= 0) {
        uri = TextUtils.replaceAll(BACKSLASH_PATTERN, uri, SLASH);
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }

    // now, minimally escape any whitespace
    uri = escapeWhitespace(uri);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explaination of group indices used in the below.
    matcher = RFC2396REGEX.matcher(uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));

    // If a scheme, is it a supported scheme?
    if (uriScheme != null && uriScheme.length() > 0 && this.schemes != null) {
        if (!(Arrays.binarySearch(schemes, uriScheme) >= 0)) {
            // unsupported; see if silently ignored
            if ((Arrays.binarySearch(ignoredSchemes, uriScheme) >= 0)) {
                throw new URIException(IGNORED_SCHEME, "Ignored scheme: " + uriScheme);
            } else {
                throw new URIException("Unsupported scheme: " + uriScheme);
            }
        }
    }

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain;
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times.
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';'
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:davmail.exchange.ExchangeSession.java

protected String getAbsoluteUri(HttpMethod method, String path) throws URIException {
    URI uri = method.getURI();/*w  ww . j a va 2s  .co  m*/
    if (path != null) {
        // reset query string
        uri.setQuery(null);
        if (path.startsWith("/")) {
            // path is absolute, replace method path
            uri.setPath(path);
        } else if (path.startsWith("http://") || path.startsWith("https://")) {
            return path;
        } else {
            // relative path, build new path
            String currentPath = method.getPath();
            int end = currentPath.lastIndexOf('/');
            if (end >= 0) {
                uri.setPath(currentPath.substring(0, end + 1) + path);
            } else {
                throw new URIException(uri.getURI());
            }
        }
    }
    return uri.getURI();
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * If http(s) scheme, check scheme specific part begins '//'.
 * @throws URIException /*from  w  ww .ja v a 2s .co m*/
 * @see http://www.faqs.org/rfcs/rfc1738.html Section 3.1. Common Internet
 * Scheme Syntax
 */
protected void checkHttpSchemeSpecificPartSlashPrefix(final URI base, final String scheme,
        final String schemeSpecificPart) throws URIException {
    if (scheme == null || scheme.length() <= 0) {
        return;
    }
    if (!scheme.equals("http") && !scheme.equals("https")) {
        return;
    }
    if (schemeSpecificPart == null || !schemeSpecificPart.startsWith("//")) {
        // only acceptable if schemes match
        if (base == null || !scheme.equals(base.getScheme())) {
            throw new URIException("relative URI with scheme only allowed for " + "scheme matching base");
        }
        return;
    }
    if (schemeSpecificPart.length() <= 2) {
        throw new URIException("http scheme specific part is " + "too short: " + schemeSpecificPart);
    }
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Fixup the domain label part of the authority.
 * /* www .  j  av  a  2s.  c o m*/
 * We're more lax than the spec. in that we allow underscores.
 * 
 * @param label Domain label to fix.
 * @return Return fixed domain label.
 * @throws URIException
 */
private String fixupDomainlabel(String label) throws URIException {

    // apply IDN-punycoding, as necessary
    try {
        // TODO: optimize: only apply when necessary, or
        // keep cache of recent encodings
        label = IDNA.toASCII(label);
    } catch (IDNAException e) {
        if (TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN, label)) {
            // domain name has ACE prefix, leading/trailing dash, or 
            // underscore -- but is still a name we wish to tolerate;
            // simply continue
        } else {
            // problematic domain: neither ASCII acceptable characters
            // nor IDN-punycodable, so throw exception 
            // TODO: change to HeritrixURIException so distinguishable
            // from URIExceptions in library code
            URIException ue = new URIException(e + " " + label);
            ue.initCause(e);
            throw ue;
        }
    }
    label = label.toLowerCase();
    return label;
}