Example usage for org.apache.commons.httpclient URIException URIException

Introduction

In this page you can find the example usage for org.apache.commons.httpclient URIException URIException.

Prototype

public URIException(String reason)

Source Link

Document

The constructor with a reason string argument.

Usage

From source file:dk.netarkivet.wayback.batch.copycode.NetarchiveSuiteUURIFactory.java

/**
 * Fixup the domain label part of the authority.
 *
 * We're more lax than the spec. in that we allow underscores.
 *
 * @param label Domain label to fix.//from   ww  w.  j ava  2s.  com
 * @return Return fixed domain label.
 * @throws URIException
 */
private String fixupDomainlabel(String label) throws URIException {

    // apply IDN-punycoding, as necessary
    try {
        // TODO optimize: only apply when necessary, or
        // keep cache of recent encodings
        label = IDNA.toASCII(label);
    } catch (IDNAException e) {
        if (TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN, label)) {
            // domain name has ACE prefix, leading/trailing dash, or
            // underscore -- but is still a name we wish to tolerate;
            // simply continue
        } else {
            // problematic domain: neither ASCII acceptable characters
            // nor IDN-punycodable, so throw exception
            // TODO change to HeritrixURIException so distinguishable
            // from URIExceptions in library code
            URIException ue = new URIException(e + " " + label);
            ue.initCause(e);
            throw ue;
        }
    }
    label = label.toLowerCase();
    return label;
}

From source file:com.cyberway.issue.net.UURIFactory.java

/**
 * Check port on passed http authority.  Make sure the size is not larger
 * than allowed: See the 'port' definition on this
 * page, http://www.kerio.com/manual/wrp/en/418.htm.
 * Also, we've seen port numbers of '0080' whose leading zeros confuse
 * the parent class. Strip the leading zeros.
 *
 * @param uriAuthority/*from w w  w.j av  a 2 s .  c  o m*/
 * @return Null or an amended port number.
 * @throws URIException
 */
private String checkPort(String uriAuthority) throws URIException {
    Matcher m = PORTREGEX.matcher(uriAuthority);
    if (m.matches()) {
        String no = m.group(2);
        if (no != null && no.length() > 0) {
            // First check if the port has leading zeros
            // as in '0080'.  Strip them if it has and
            // then reconstitute the uriAuthority.  Be careful
            // of cases where port is '0' or '000'.
            while (no.charAt(0) == '0' && no.length() > 1) {
                no = no.substring(1);
            }
            uriAuthority = m.group(1) + no;
            // Now makesure the number is legit.
            int portNo = 0;
            try {
                portNo = Integer.parseInt(no);
            } catch (NumberFormatException nfe) {
                // just catch and leave portNo at illegal 0
            }
            if (portNo <= 0 || portNo > 65535) {
                throw new URIException("Port out of bounds: " + uriAuthority);
            }
        }
    }
    return uriAuthority;
}

From source file:org.apache.webdav.ui.WebdavSystemView.java

private static HttpURL uriToHttpURL(String uri) throws URIException {
    HttpURL url = null;/*from   www  .j  a v  a  2s  .  c o  m*/
    if (uri.startsWith("http://")) {
        url = new HttpURL(uri);
    } else if (uri.startsWith("https://")) {
        url = new HttpsURL(uri);
    } else {
        throw new URIException("Unknown protocol in URL " + uri);
    }
    return url;
}

From source file:org.archive.url.LaxURI.java

/**
 * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
 * http://issues.apache.org/jira/browse/HTTPCLIENT-588
 * AND//from ww w  . j av  a 2s.c  om
 * http://webteam.archive.org/jira/browse/HER-1268
 * 
 * In order to avoid any possilbity of conflict with non-ASCII characters,
 * Parse a URI reference as a <code>String</code> with the character
 * encoding of the local system or the document.
 * <p>
 * The following line is the regular expression for breaking-down a URI
 * reference into its components.
 * <p><blockquote><pre>
 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 *    12            3  4          5       6  7        8 9
 * </pre></blockquote><p>
 * For example, matching the above expression to
 *   http://jakarta.apache.org/ietf/uri/#Related
 * results in the following subexpression matches:
 * <p><blockquote><pre>
 *               $1 = http:
 *  scheme    =  $2 = http
 *               $3 = //jakarta.apache.org
 *  authority =  $4 = jakarta.apache.org
 *  path      =  $5 = /ietf/uri/
 *               $6 = <undefined>
 *  query     =  $7 = <undefined>
 *               $8 = #Related
 *  fragment  =  $9 = Related
 * </pre></blockquote><p>
 *
 * @param original the original character sequence
 * @param escaped <code>true</code> if <code>original</code> is escaped
 * @throws URIException If an error occurs.
 */
protected void parseUriReference(String original, boolean escaped) throws URIException {

    // validate and contruct the URI character sequence
    if (original == null) {
        throw new URIException("URI-Reference required");
    }

    /* @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     */
    String tmp = original.trim();

    /*
     * The length of the string sequence of characters.
     * It may not be equal to the length of the byte array.
     */
    int length = tmp.length();

    /*
     * Remove the delimiters like angle brackets around an URI.
     */
    if (length > 0) {
        char[] firstDelimiter = { tmp.charAt(0) };
        if (validate(firstDelimiter, delims)) {
            if (length >= 2) {
                char[] lastDelimiter = { tmp.charAt(length - 1) };
                if (validate(lastDelimiter, delims)) {
                    tmp = tmp.substring(1, length - 1);
                    length = length - 2;
                }
            }
        }
    }

    /*
     * The starting index
     */
    int from = 0;

    /*
     * The test flag whether the URI is started from the path component.
     */
    boolean isStartedFromPath = false;
    int atColon = tmp.indexOf(':');
    int atSlash = tmp.indexOf('/');
    if (!tmp.startsWith("//") && (atColon <= 0 || (atSlash >= 0 && atSlash < atColon))) {
        isStartedFromPath = true;
    }

    /*
     * <p><blockquote><pre>
     *     @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
    if (at == -1) {
        at = 0;
    }

    /*
     * Parse the scheme.
     * <p><blockquote><pre>
     *  scheme    =  $2 = http
     *              @
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (at > 0 && at < length && tmp.charAt(at) == ':') {
        char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
        if (validate(target, scheme)) {
            _scheme = target;
            from = ++at;
        } else {
            // IA CHANGE:
            // do nothing; allow interpretation as URI with 
            // later colon in other syntactical component
        }
    }

    /*
     * Parse the authority component.
     * <p><blockquote><pre>
     *  authority =  $4 = jakarta.apache.org
     *                  @@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    // Reset flags
    _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
    if (0 <= at && at < length && tmp.charAt(at) == '/') {
        // Set flag
        _is_hier_part = true;
        if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) {
            // the temporary index to start the search from
            int next = indexFirstOf(tmp, "/?#", at + 2);
            if (next == -1) {
                next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length();
            }
            parseAuthority(tmp.substring(at + 2, next), escaped);
            from = at = next;
            // Set flag
            _is_net_path = true;
        }
        if (from == at) {
            // Set flag
            _is_abs_path = true;
        }
    }

    /*
     * Parse the path component.
     * <p><blockquote><pre>
     *  path      =  $5 = /ietf/uri/
     *                                @@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (from < length) {
        // rel_path = rel_segment [ abs_path ]
        int next = indexFirstOf(tmp, "?#", from);
        if (next == -1) {
            next = tmp.length();
        }
        if (!_is_abs_path) {
            if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                // Set flag
                _is_rel_path = true;
            } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
                    || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                // Set flag
                _is_opaque_part = true;
            } else {
                // the path component may be empty
                _path = null;
            }
        }
        String s = tmp.substring(from, next);
        if (escaped) {
            setRawPath(s.toCharArray());
        } else {
            setPath(s);
        }
        at = next;
    }

    // set the charset to do escape encoding
    String charset = getProtocolCharset();

    /*
     * Parse the query component.
     * <p><blockquote><pre>
     *  query     =  $7 = <undefined>
     *                                        @@@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
        int next = tmp.indexOf('#', at + 1);
        if (next == -1) {
            next = tmp.length();
        }
        if (escaped) {
            _query = tmp.substring(at + 1, next).toCharArray();
            if (!validate(_query, query)) {
                throw new URIException("Invalid query");
            }
        } else {
            _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
        }
        at = next;
    }

    /*
     * Parse the fragment component.
     * <p><blockquote><pre>
     *  fragment  =  $9 = Related
     *                                                   @@@@@@@@
     *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     * </pre></blockquote><p>
     */
    if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
        if (at + 1 == length) { // empty fragment
            _fragment = "".toCharArray();
        } else {
            _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
        }
    }

    // set this URI.
    setURI();
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Check the generated UURI./*from  w w  w.j  av a  2  s  . c  o m*/
 * 
 * At the least look at length of uuri string.  We were seeing case
 * where before escaping, string was &lt; MAX_URL_LENGTH but after was
 * &gt;.  Letting out a too-big message was causing us troubles later
 * down the processing chain.
 * @param uuri Created uuri to check.
 * @return The passed <code>uuri</code> so can easily inline this check.
 * @throws URIException
 */
protected UsableURI validityCheck(UsableURI uuri) throws URIException {
    if (uuri.getRawURI().length > UsableURI.MAX_URL_LENGTH) {
        throw new URIException("Created (escaped) uuri > " + UsableURI.MAX_URL_LENGTH + ": " + uuri.toString());
    }
    return uuri;
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Do heritrix fix-up on passed uri string.
 *
 * Does heritrix escaping; usually escaping done to make our behavior align
 * with IEs.  This method codifies our experience pulling URIs from the
 * wilds.  Its does all the escaping we want; its output can always be
 * assumed to be 'escaped' (though perhaps to a laxer standard than the 
 * vanilla HttpClient URI class or official specs might suggest). 
 *
 * @param uri URI as string.// www.  j  av a  2 s  .co m
 * @param base May be null.
 * @param e True if the uri is already escaped.
 * @return A fixed up URI string.
 * @throws URIException
 */
private String fixup(String uri, final URI base, final String charset) throws URIException {
    if (uri == null) {
        throw new NullPointerException();
    } else if (uri.length() == 0 && base == null) {
        throw new URIException("URI length is zero (and not relative).");
    }

    if (uri.length() > UsableURI.MAX_URL_LENGTH) {
        // We check length here and again later after all convertions.
        throw new URIException("URI length > " + UsableURI.MAX_URL_LENGTH + ": " + uri);
    }

    // Replace nbsp with normal spaces (so that they get stripped if at
    // ends, or encoded if in middle)
    if (uri.indexOf(NBSP) >= 0) {
        uri = TextUtils.replaceAll(NBSP, uri, SPACE);
    }

    // Get rid of any trailing spaces or new-lines. 
    uri = uri.trim();

    // IE converts backslashes preceding the query string to slashes, rather
    // than to %5C. Since URIs that have backslashes usually work only with
    // IE, we will convert backslashes to slashes as well.
    int nextBackslash = uri.indexOf(BACKSLASH);
    if (nextBackslash >= 0) {
        int queryStart = uri.indexOf('?');
        StringBuilder tmp = new StringBuilder(uri);
        while (nextBackslash >= 0 && (queryStart < 0 || nextBackslash < queryStart)) {
            tmp.setCharAt(nextBackslash, '/');
            nextBackslash = uri.indexOf(BACKSLASH, nextBackslash + 1);
        }
        uri = tmp.toString();
    }

    // Remove stray TAB/CR/LF
    uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);

    // Test for the case of more than two slashes after the http(s) scheme.
    // Replace with two slashes as mozilla does if found.
    // See [ 788219 ] URI Syntax Errors stop page parsing.
    //        Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
    Matcher matcher = TextUtils.getMatcher(HTTP_SCHEME_SLASHES.pattern(), uri);
    if (matcher.matches()) {
        uri = matcher.group(1) + matcher.group(2);
    }
    TextUtils.recycleMatcher(matcher);

    // For further processing, get uri elements.  See the RFC2396REGEX
    // comment above for explanation of group indices used in the below.
    //        matcher = RFC2396REGEX.matcher(uri);
    matcher = TextUtils.getMatcher(RFC2396REGEX.pattern(), uri);
    if (!matcher.matches()) {
        throw new URIException("Failed parse of " + uri);
    }
    String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
    String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
    String uriAuthority = checkUriElement(matcher.group(5));
    String uriPath = checkUriElement(matcher.group(6));
    String uriQuery = checkUriElement(matcher.group(8));
    // UNUSED String uriFragment = checkUriElement(matcher.group(10));
    TextUtils.recycleMatcher(matcher);
    matcher = null;

    // Test if relative URI. If so, need a base to resolve against.
    if (uriScheme == null || uriScheme.length() <= 0) {
        if (base == null) {
            throw new URIException("Relative URI but no base: " + uri);
        }
    } else {
        checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart);
    }

    // fixup authority portion: lowercase/IDN-punycode any domain; 
    // remove stray trailing spaces
    uriAuthority = fixupAuthority(uriAuthority, charset);

    // Do some checks if absolute path.
    if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) {
        if (uriPath != null) {
            // Eliminate '..' if its first thing in the path.  IE does this.
            uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH);
        }
        // Ensure root URLs end with '/': browsers always send "/"
        // on the request-line, so we should consider "http://host"
        // to be "http://host/".
        if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
            uriPath = SLASH;
        }
    }

    if (uriAuthority != null) {
        if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTP_PORT);
        } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) {
            uriAuthority = checkPort(uriAuthority);
            uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
        }
        // Strip any prefix dot or tail dots from the authority.
        uriAuthority = stripTail(uriAuthority, DOT);
        uriAuthority = stripPrefix(uriAuthority, DOT);
    } else {
        // no authority; may be relative. consider stripping scheme
        // to work-around org.apache.commons.httpclient.URI bug
        // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 )
        if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) {
            // uriScheme redundant and will only confound httpclient.URI
            uriScheme = null;
        }
    }

    // Ensure minimal escaping. Use of 'lax' URI and URLCodec 
    // means minimal escaping isn't necessarily complete/consistent.
    // There is a chance such lax encoding will throw exceptions
    // later at inconvenient times. 
    //
    // One reason for these bad escapings -- though not the only --
    // is that the page is using an encoding other than the ASCII or the
    // UTF-8 that is our default URI encoding.  In this case the parent
    // class is burping on the passed URL encoding.  If the page encoding
    // was passed into this factory, the encoding seems to be parsed
    // correctly (See the testEscapedEncoding unit test).
    //
    // This fixup may cause us to miss content.  There is the charset case
    // noted above.  TODO: Look out for cases where we fail other than for
    // the above given reason which will be fixed when we address
    // '[ 913687 ] Make extractors interrogate for charset'.

    uriPath = ensureMinimalEscaping(uriPath, charset);
    uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE);

    // Preallocate.  The '1's and '2's in below are space for ':',
    // '//', etc. URI characters.
    MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';' 
            + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//'
            + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?'
            + ((uriQuery != null) ? uriQuery.length() : 0));
    appendNonNull(s, uriScheme, ":", true);
    appendNonNull(s, uriAuthority, "//", false);
    appendNonNull(s, uriPath, "", false);
    appendNonNull(s, uriQuery, "?", false);
    return s.toString();
}

From source file:org.archive.url.UsableURIFactory.java

/**
 * Check port on passed http authority.  Make sure the size is not larger
 * than allowed: See the 'port' definition on this
 * page, http://www.kerio.com/manual/wrp/en/418.htm.
 * Also, we've seen port numbers of '0080' whose leading zeros confuse
 * the parent class. Strip the leading zeros.
 *
 * @param uriAuthority/*from  w ww.j a  va2  s  .  c om*/
 * @return Null or an amended port number.
 * @throws URIException
 */
private String checkPort(String uriAuthority) throws URIException {
    //        Matcher m = PORTREGEX.matcher(uriAuthority);
    Matcher m = TextUtils.getMatcher(PORTREGEX.pattern(), uriAuthority);
    if (m.matches()) {
        String no = m.group(2);
        if (no != null && no.length() > 0) {
            // First check if the port has leading zeros
            // as in '0080'.  Strip them if it has and
            // then reconstitute the uriAuthority.  Be careful
            // of cases where port is '0' or '000'.
            while (no.charAt(0) == '0' && no.length() > 1) {
                no = no.substring(1);
            }
            uriAuthority = m.group(1) + no;
            // Now makesure the number is legit.
            int portNo = 0;
            try {
                portNo = Integer.parseInt(no);
            } catch (NumberFormatException nfe) {
                // just catch and leave portNo at illegal 0
            }
            if (portNo <= 0 || portNo > 65535) {
                throw new URIException("Port out of bounds: " + uriAuthority);
            }
        }
    }
    TextUtils.recycleMatcher(m);
    return uriAuthority;
}

From source file:org.archive.wayback.util.url.KeyMakerUrlCanonicalizer.java

public String urlStringToKey(String url) throws URIException {
    try {/*from  ww w. j a  v a2 s  . c  o  m*/
        return keyMaker.makeKey(url);
    } catch (URISyntaxException e) {
        throw new URIException(e.getMessage());
    }
}

From source file:org.parosproxy.paros.core.scanner.Analyser.java

private String getPathRegex(URI uri) throws URIException {
    URI newUri;/*  w ww.java 2 s  .  c om*/
    // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient
    try {
        newUri = (URI) uri.clone();

    } catch (CloneNotSupportedException e) {
        throw new URIException(e.getMessage());
    }

    String query = newUri.getQuery();
    StringBuilder sb = new StringBuilder(100);

    // case should be sensitive
    //sb.append("(?i)");
    newUri.setQuery(null);

    sb.append(newUri.toString().replaceAll("\\.", "\\."));
    if (query != null) {
        String queryPattern = "(\\?" + query + ")?";
        sb.append(queryPattern);
    }

    return sb.toString();
}

From source file:org.parosproxy.paros.extension.filter.FilterLogCookie.java

@Override
public void onHttpRequestSend(HttpMessage msg) {
    HttpRequestHeader header = msg.getRequestHeader();

    if (header != null) {
        String cookie = header.getHeader("Cookie");
        synchronized (cookieList) {
            if (cookie != null && cookieList.indexOf(cookie) == -1) {
                try {
                    // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient
                    URI uri;/*from  w w  w  .  ja  va  2 s  .  c o m*/
                    try {
                        uri = (URI) header.getURI().clone();
                    } catch (CloneNotSupportedException e) {
                        throw new URIException(e.getMessage());
                    }
                    uri.setQuery(null);
                    String sUri = uri.toString();
                    cookieList.add(cookie);
                    getView().getOutputPanel().append(sUri + DELIM + cookie + "\n");

                } catch (URIException e) {
                    // ZAP: Print stack trace to Output tab
                    getView().getOutputPanel().append(e);
                }
            }
        }
    }
}