Example usage for org.apache.commons.httpclient URIException URIException

List of usage examples for org.apache.commons.httpclient URIException URIException

Introduction

In this page you can find the example usage for org.apache.commons.httpclient URIException URIException.

Prototype

public URIException(String reason) 

Source Link

Document

The constructor with a reason string argument.

Usage

From source file:dk.netarkivet.wayback.batch.copycode.NetarchiveSuiteUURIFactory.java

/**
 * Fixup the domain label part of the authority.
 *
 * We're more lax than the spec. in that we allow underscores.
 *
 * @param label Domain label to fix.//from   ww  w.  j ava  2s.  com
 * @return Return fixed domain label.
 * @throws URIException
 */
private String fixupDomainlabel(String label) throws URIException {

    // apply IDN-punycoding, as necessary
    try {
        // TODO optimize: only apply when necessary, or
        // keep cache of recent encodings
        label = IDNA.toASCII(label);
    } catch (IDNAException e) {
        if (TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN, label)) {
            // domain name has ACE prefix, leading/trailing dash, or
            // underscore -- but is still a name we wish to tolerate;
            // simply continue
        } else {
            // problematic domain: neither ASCII acceptable characters
            // nor IDN-punycodable, so throw exception
            // TODO change to HeritrixURIException so distinguishable
            // from URIExceptions in library code
            URIException ue = new URIException(e + " " + label);
            ue.initCause(e);
            throw ue;
        }
    }
    label = label.toLowerCase();
    return label;
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Check port on passed http authority.  Make sure the size is not larger
 * than allowed: See the 'port' definition on this
 * page, http://www.kerio.com/manual/wrp/en/418.htm.
 * Also, we've seen port numbers of '0080' whose leading zeros confuse
 * the parent class. Strip the leading zeros.
 *
 * @param uriAuthority/*from w w  w.j av  a 2 s .  c  o m*/
 * @return Null or an amended port number.
 * @throws URIException
 */
private String checkPort(String uriAuthority) throws URIException {
    Matcher m = PORTREGEX.matcher(uriAuthority);
    if (m.matches()) {
        String no = m.group(2);
        if (no != null && no.length() > 0) {
            // First check if the port has leading zeros
            // as in '0080'.  Strip them if it has and
            // then reconstitute the uriAuthority.  Be careful
            // of cases where port is '0' or '000'.
            while (no.charAt(0) == '0' && no.length() > 1) {
                no = no.substring(1);
            }
            uriAuthority = m.group(1) + no;
            // Now makesure the number is legit.
            int portNo = 0;
            try {
                portNo = Integer.parseInt(no);
            } catch (NumberFormatException nfe) {
                // just catch and leave portNo at illegal 0
            }
            if (portNo <= 0 || portNo > 65535) {
                throw new URIException("Port out of bounds: " + uriAuthority);
            }
        }
    }
    return uriAuthority;
}

From source file:org.apache.webdav.ui.WebdavSystemView.java

private static HttpURL uriToHttpURL(String uri) throws URIException {
    HttpURL url = null;/*from   www  .j  a v  a  2s  .  c o  m*/
    if (uri.startsWith("http://")) {
        url = new HttpURL(uri);
    } else if (uri.startsWith("https://")) {
        url = new HttpsURL(uri);
    } else {
        throw new URIException("Unknown protocol in URL " + uri);
    }
    return url;
}

From source file:org.archive.url.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND//from ww w  . j av  a 2s.c  om
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if (!tmp.startsWith("//") && (atColon <= 0 || (atSlash >= 0 && atSlash < atColon))) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }
    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Check the generated UURI./*from  w w  w.j  av a  2  s  . c  o m*/
 * 
 * At the least look at length of uuri string.  We were seeing case
 * where before escaping, string was &lt; MAX_URL_LENGTH but after was
 * &gt;.  Letting out a too-big message was causing us troubles later
 * down the processing chain.
 * @param uuri Created uuri to check.
 * @return The passed <code>uuri</code> so can easily inline this check.
 * @throws URIException
 */
protected UsableURI validityCheck(UsableURI uuri) throws URIException {
    if (uuri.getRawURI().length > UsableURI.MAX_URL_LENGTH) {
        throw new URIException("Created (escaped) uuri > " + UsableURI.MAX_URL_LENGTH + ": " + uuri.toString());
    }
    return uuri;
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the 
 * vanilla HttpClient URI class or official specs might suggest). 
 *
 * @param uri URI as string.// www.  j  av a  2 s  .co m
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UsableURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UsableURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines. 
    uri = uri.trim();

    // IE converts backslashes preceding the query string to slashes, rather
    // than to %5C. Since URIs that have backslashes usually work only with
    // IE, we will convert backslashes to slashes as well.
    int nextBackslash = uri.indexOf(BACKSLASH);
    if (nextBackslash >= 0) {
        int queryStart = uri.indexOf('?');
        StringBuilder tmp = new StringBuilder(uri);
        while (nextBackslash >= 0 && (queryStart < 0 || nextBackslash < queryStart)) {
            tmp.setCharAt(nextBackslash, '/');
            nextBackslash = uri.indexOf(BACKSLASH, nextBackslash + 1);
        }
        uri = tmp.toString();
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    //        Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    Matcher matcher = TextUtils.getMatcher(HTTP_SCHEME_SLASHES.pattern(), uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }
    TextUtils.recycleMatcher(matcher);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explanation of group indices used in the below.
    //        matcher = RFC2396REGEX.matcher(uri);
    matcher = TextUtils.getMatcher(RFC2396REGEX.pattern(), uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));
    TextUtils.recycleMatcher(matcher);
    matcher = null;

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain; 
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority, charset);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec 
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times. 
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO: Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';' 
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Check port on passed http authority.  Make sure the size is not larger
 * than allowed: See the 'port' definition on this
 * page, http://www.kerio.com/manual/wrp/en/418.htm.
 * Also, we've seen port numbers of '0080' whose leading zeros confuse
 * the parent class. Strip the leading zeros.
 *
 * @param uriAuthority/*from  w ww.j a  va2  s  .  c om*/
 * @return Null or an amended port number.
 * @throws URIException
 */
private String checkPort(String uriAuthority) throws URIException {
    //        Matcher m = PORTREGEX.matcher(uriAuthority);
    Matcher m = TextUtils.getMatcher(PORTREGEX.pattern(), uriAuthority);
    if (m.matches()) {
        String no = m.group(2);
        if (no != null && no.length() > 0) {
            // First check if the port has leading zeros
            // as in '0080'.  Strip them if it has and
            // then reconstitute the uriAuthority.  Be careful
            // of cases where port is '0' or '000'.
            while (no.charAt(0) == '0' && no.length() > 1) {
                no = no.substring(1);
            }
            uriAuthority = m.group(1) + no;
            // Now makesure the number is legit.
            int portNo = 0;
            try {
                portNo = Integer.parseInt(no);
            } catch (NumberFormatException nfe) {
                // just catch and leave portNo at illegal 0
            }
            if (portNo <= 0 || portNo > 65535) {
                throw new URIException("Port out of bounds: " + uriAuthority);
            }
        }
    }
    TextUtils.recycleMatcher(m);
    return uriAuthority;
}

From source file:org.archive.wayback.util.url.KeyMakerUrlCanonicalizer.java

public String urlStringToKey(String url) throws URIException {
    try {/*from  ww w. j a  v a2 s  . c  o  m*/
        return keyMaker.makeKey(url);
    } catch (URISyntaxException e) {
        throw new URIException(e.getMessage());
    }
}

From source file:org.parosproxy.paros.core.scanner.Analyser.java

private String getPathRegex(URI uri) throws URIException {
    URI newUri;/*  w ww.java 2 s  .  c om*/
    // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient
    try {
        newUri = (URI) uri.clone();

    } catch (CloneNotSupportedException e) {
        throw new URIException(e.getMessage());
    }

    String query = newUri.getQuery();
    StringBuilder sb = new StringBuilder(100);

    // case should be sensitive
    //sb.append("(?i)");
    newUri.setQuery(null);

    sb.append(newUri.toString().replaceAll("\\.", "\\."));
    if (query != null) {
        String queryPattern = "(\\?" + query + ")?";
        sb.append(queryPattern);
    }

    return sb.toString();
}

From source file:org.parosproxy.paros.extension.filter.FilterLogCookie.java

@Override
public void onHttpRequestSend(HttpMessage msg) {
    HttpRequestHeader header = msg.getRequestHeader();

    if (header != null) {
        String cookie = header.getHeader("Cookie");
        synchronized (cookieList) {
            if (cookie != null && cookieList.indexOf(cookie) == -1) {
                try {
                    // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient
                    URI uri;/*from  w w  w  .  ja  va  2 s  .  c o m*/
                    try {
                        uri = (URI) header.getURI().clone();
                    } catch (CloneNotSupportedException e) {
                        throw new URIException(e.getMessage());
                    }
                    uri.setQuery(null);
                    String sUri = uri.toString();
                    cookieList.add(cookie);
                    getView().getOutputPanel().append(sUri + DELIM + cookie + "\n");

                } catch (URIException e) {
                    // ZAP: Print stack trace to Output tab
                    getView().getOutputPanel().append(e);
                }
            }
        }
    }
}