List of usage examples for org.apache.commons.httpclient URIException URIException
public URIException(String reason)
From source file:dk.netarkivet.wayback.batch.copycode.NetarchiveSuiteUURIFactory.java
/** * Fixup the domain label part of the authority. * * We're more lax than the spec. in that we allow underscores. * * @param label Domain label to fix.//from ww w. j ava 2s. com * @return Return fixed domain label. * @throws URIException */ private String fixupDomainlabel(String label) throws URIException { // apply IDN-punycoding, as necessary try { // TODO optimize: only apply when necessary, or // keep cache of recent encodings label = IDNA.toASCII(label); } catch (IDNAException e) { if (TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN, label)) { // domain name has ACE prefix, leading/trailing dash, or // underscore -- but is still a name we wish to tolerate; // simply continue } else { // problematic domain: neither ASCII acceptable characters // nor IDN-punycodable, so throw exception // TODO change to HeritrixURIException so distinguishable // from URIExceptions in library code URIException ue = new URIException(e + " " + label); ue.initCause(e); throw ue; } } label = label.toLowerCase(); return label; }
From source file:com.cyberway.issue.net.UURIFactory.java
/** * Check port on passed http authority. Make sure the size is not larger * than allowed: See the 'port' definition on this * page, http://www.kerio.com/manual/wrp/en/418.htm. * Also, we've seen port numbers of '0080' whose leading zeros confuse * the parent class. Strip the leading zeros. * * @param uriAuthority/*from w w w.j av a 2 s . c o m*/ * @return Null or an amended port number. * @throws URIException */ private String checkPort(String uriAuthority) throws URIException { Matcher m = PORTREGEX.matcher(uriAuthority); if (m.matches()) { String no = m.group(2); if (no != null && no.length() > 0) { // First check if the port has leading zeros // as in '0080'. Strip them if it has and // then reconstitute the uriAuthority. Be careful // of cases where port is '0' or '000'. while (no.charAt(0) == '0' && no.length() > 1) { no = no.substring(1); } uriAuthority = m.group(1) + no; // Now makesure the number is legit. int portNo = 0; try { portNo = Integer.parseInt(no); } catch (NumberFormatException nfe) { // just catch and leave portNo at illegal 0 } if (portNo <= 0 || portNo > 65535) { throw new URIException("Port out of bounds: " + uriAuthority); } } } return uriAuthority; }
From source file:org.apache.webdav.ui.WebdavSystemView.java
private static HttpURL uriToHttpURL(String uri) throws URIException { HttpURL url = null;/*from www .j a v a 2s . c o m*/ if (uri.startsWith("http://")) { url = new HttpURL(uri); } else if (uri.startsWith("https://")) { url = new HttpsURL(uri); } else { throw new URIException("Unknown protocol in URL " + uri); } return url; }
From source file:org.archive.url.LaxURI.java
/** * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR * http://issues.apache.org/jira/browse/HTTPCLIENT-588 * AND//from ww w . j av a 2s.c om * http://webteam.archive.org/jira/browse/HER-1268 * * In order to avoid any possilbity of conflict with non-ASCII characters, * Parse a URI reference as a <code>String</code> with the character * encoding of the local system or the document. * <p> * The following line is the regular expression for breaking-down a URI * reference into its components. * <p><blockquote><pre> * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * 12 3 4 5 6 7 8 9 * </pre></blockquote><p> * For example, matching the above expression to * http://jakarta.apache.org/ietf/uri/#Related * results in the following subexpression matches: * <p><blockquote><pre> * $1 = http: * scheme = $2 = http * $3 = //jakarta.apache.org * authority = $4 = jakarta.apache.org * path = $5 = /ietf/uri/ * $6 = <undefined> * query = $7 = <undefined> * $8 = #Related * fragment = $9 = Related * </pre></blockquote><p> * * @param original the original character sequence * @param escaped <code>true</code> if <code>original</code> is escaped * @throws URIException If an error occurs. */ protected void parseUriReference(String original, boolean escaped) throws URIException { // validate and contruct the URI character sequence if (original == null) { throw new URIException("URI-Reference required"); } /* @ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? */ String tmp = original.trim(); /* * The length of the string sequence of characters. * It may not be equal to the length of the byte array. */ int length = tmp.length(); /* * Remove the delimiters like angle brackets around an URI. */ if (length > 0) { char[] firstDelimiter = { tmp.charAt(0) }; if (validate(firstDelimiter, delims)) { if (length >= 2) { char[] lastDelimiter = { tmp.charAt(length - 1) }; if (validate(lastDelimiter, delims)) { tmp = tmp.substring(1, length - 1); length = length - 2; } } } } /* * The starting index */ int from = 0; /* * The test flag whether the URI is started from the path component. */ boolean isStartedFromPath = false; int atColon = tmp.indexOf(':'); int atSlash = tmp.indexOf('/'); if (!tmp.startsWith("//") && (atColon <= 0 || (atSlash >= 0 && atSlash < atColon))) { isStartedFromPath = true; } /* * <p><blockquote><pre> * @@@@@@@@ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); if (at == -1) { at = 0; } /* * Parse the scheme. * <p><blockquote><pre> * scheme = $2 = http * @ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ if (at > 0 && at < length && tmp.charAt(at) == ':') { char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); if (validate(target, scheme)) { _scheme = target; from = ++at; } else { // IA CHANGE: // do nothing; allow interpretation as URI with // later colon in other syntactical component } } /* * Parse the authority component. * <p><blockquote><pre> * authority = $4 = jakarta.apache.org * @@ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ // Reset flags _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; if (0 <= at && at < length && tmp.charAt(at) == '/') { // Set flag _is_hier_part = true; if (at + 2 < length && tmp.charAt(at + 1) == '/' && !isStartedFromPath) { // the temporary index to start the search from int next = indexFirstOf(tmp, "/?#", at + 2); if (next == -1) { next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length(); } parseAuthority(tmp.substring(at + 2, next), escaped); from = at = next; // Set flag _is_net_path = true; } if (from == at) { // Set flag _is_abs_path = true; } } /* * Parse the path component. * <p><blockquote><pre> * path = $5 = /ietf/uri/ * @@@@@@ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ if (from < length) { // rel_path = rel_segment [ abs_path ] int next = indexFirstOf(tmp, "?#", from); if (next == -1) { next = tmp.length(); } if (!_is_abs_path) { if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path) || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) { // Set flag _is_rel_path = true; } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part) || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { // Set flag _is_opaque_part = true; } else { // the path component may be empty _path = null; } } String s = tmp.substring(from, next); if (escaped) { setRawPath(s.toCharArray()); } else { setPath(s); } at = next; } // set the charset to do escape encoding String charset = getProtocolCharset(); /* * Parse the query component. * <p><blockquote><pre> * query = $7 = <undefined> * @@@@@@@@@ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { int next = tmp.indexOf('#', at + 1); if (next == -1) { next = tmp.length(); } if (escaped) { _query = tmp.substring(at + 1, next).toCharArray(); if (!validate(_query, query)) { throw new URIException("Invalid query"); } } else { _query = encode(tmp.substring(at + 1, next), allowed_query, charset); } at = next; } /* * Parse the fragment component. * <p><blockquote><pre> * fragment = $9 = Related * @@@@@@@@ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * </pre></blockquote><p> */ if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { if (at + 1 == length) { // empty fragment _fragment = "".toCharArray(); } else { _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() : encode(tmp.substring(at + 1), allowed_fragment, charset); } } // set this URI. setURI(); }
From source file:org.archive.url.UsableURIFactory.java
/** * Check the generated UURI./*from w w w.j av a 2 s . c o m*/ * * At the least look at length of uuri string. We were seeing case * where before escaping, string was < MAX_URL_LENGTH but after was * >. Letting out a too-big message was causing us troubles later * down the processing chain. * @param uuri Created uuri to check. * @return The passed <code>uuri</code> so can easily inline this check. * @throws URIException */ protected UsableURI validityCheck(UsableURI uuri) throws URIException { if (uuri.getRawURI().length > UsableURI.MAX_URL_LENGTH) { throw new URIException("Created (escaped) uuri > " + UsableURI.MAX_URL_LENGTH + ": " + uuri.toString()); } return uuri; }
From source file:org.archive.url.UsableURIFactory.java
/** * Do heritrix fix-up on passed uri string. * * Does heritrix escaping; usually escaping done to make our behavior align * with IEs. This method codifies our experience pulling URIs from the * wilds. Its does all the escaping we want; its output can always be * assumed to be 'escaped' (though perhaps to a laxer standard than the * vanilla HttpClient URI class or official specs might suggest). * * @param uri URI as string.// www. j av a 2 s .co m * @param base May be null. * @param e True if the uri is already escaped. * @return A fixed up URI string. * @throws URIException */ private String fixup(String uri, final URI base, final String charset) throws URIException { if (uri == null) { throw new NullPointerException(); } else if (uri.length() == 0 && base == null) { throw new URIException("URI length is zero (and not relative)."); } if (uri.length() > UsableURI.MAX_URL_LENGTH) { // We check length here and again later after all convertions. throw new URIException("URI length > " + UsableURI.MAX_URL_LENGTH + ": " + uri); } // Replace nbsp with normal spaces (so that they get stripped if at // ends, or encoded if in middle) if (uri.indexOf(NBSP) >= 0) { uri = TextUtils.replaceAll(NBSP, uri, SPACE); } // Get rid of any trailing spaces or new-lines. uri = uri.trim(); // IE converts backslashes preceding the query string to slashes, rather // than to %5C. Since URIs that have backslashes usually work only with // IE, we will convert backslashes to slashes as well. int nextBackslash = uri.indexOf(BACKSLASH); if (nextBackslash >= 0) { int queryStart = uri.indexOf('?'); StringBuilder tmp = new StringBuilder(uri); while (nextBackslash >= 0 && (queryStart < 0 || nextBackslash < queryStart)) { tmp.setCharAt(nextBackslash, '/'); nextBackslash = uri.indexOf(BACKSLASH, nextBackslash + 1); } uri = tmp.toString(); } // Remove stray TAB/CR/LF uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING); // Test for the case of more than two slashes after the http(s) scheme. // Replace with two slashes as mozilla does if found. // See [ 788219 ] URI Syntax Errors stop page parsing. // Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri); Matcher matcher = TextUtils.getMatcher(HTTP_SCHEME_SLASHES.pattern(), uri); if (matcher.matches()) { uri = matcher.group(1) + matcher.group(2); } TextUtils.recycleMatcher(matcher); // For further processing, get uri elements. See the RFC2396REGEX // comment above for explanation of group indices used in the below. // matcher = RFC2396REGEX.matcher(uri); matcher = TextUtils.getMatcher(RFC2396REGEX.pattern(), uri); if (!matcher.matches()) { throw new URIException("Failed parse of " + uri); } String uriScheme = checkUriElementAndLowerCase(matcher.group(2)); String uriSchemeSpecificPart = checkUriElement(matcher.group(3)); String uriAuthority = checkUriElement(matcher.group(5)); String uriPath = checkUriElement(matcher.group(6)); String uriQuery = checkUriElement(matcher.group(8)); // UNUSED String uriFragment = checkUriElement(matcher.group(10)); TextUtils.recycleMatcher(matcher); matcher = null; // Test if relative URI. If so, need a base to resolve against. if (uriScheme == null || uriScheme.length() <= 0) { if (base == null) { throw new URIException("Relative URI but no base: " + uri); } } else { checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme, uriSchemeSpecificPart); } // fixup authority portion: lowercase/IDN-punycode any domain; // remove stray trailing spaces uriAuthority = fixupAuthority(uriAuthority, charset); // Do some checks if absolute path. if (uriSchemeSpecificPart != null && uriSchemeSpecificPart.startsWith(SLASH)) { if (uriPath != null) { // Eliminate '..' if its first thing in the path. IE does this. uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath, SLASH); } // Ensure root URLs end with '/': browsers always send "/" // on the request-line, so we should consider "http://host" // to be "http://host/". if (uriPath == null || EMPTY_STRING.equals(uriPath)) { uriPath = SLASH; } } if (uriAuthority != null) { if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTP)) { uriAuthority = checkPort(uriAuthority); uriAuthority = stripTail(uriAuthority, HTTP_PORT); } else if (uriScheme != null && uriScheme.length() > 0 && uriScheme.equals(HTTPS)) { uriAuthority = checkPort(uriAuthority); uriAuthority = stripTail(uriAuthority, HTTPS_PORT); } // Strip any prefix dot or tail dots from the authority. uriAuthority = stripTail(uriAuthority, DOT); uriAuthority = stripPrefix(uriAuthority, DOT); } else { // no authority; may be relative. consider stripping scheme // to work-around org.apache.commons.httpclient.URI bug // ( http://issues.apache.org/jira/browse/HTTPCLIENT-587 ) if (uriScheme != null && base != null && uriScheme.equals(base.getScheme())) { // uriScheme redundant and will only confound httpclient.URI uriScheme = null; } } // Ensure minimal escaping. Use of 'lax' URI and URLCodec // means minimal escaping isn't necessarily complete/consistent. // There is a chance such lax encoding will throw exceptions // later at inconvenient times. // // One reason for these bad escapings -- though not the only -- // is that the page is using an encoding other than the ASCII or the // UTF-8 that is our default URI encoding. In this case the parent // class is burping on the passed URL encoding. If the page encoding // was passed into this factory, the encoding seems to be parsed // correctly (See the testEscapedEncoding unit test). // // This fixup may cause us to miss content. There is the charset case // noted above. TODO: Look out for cases where we fail other than for // the above given reason which will be fixed when we address // '[ 913687 ] Make extractors interrogate for charset'. uriPath = ensureMinimalEscaping(uriPath, charset); uriQuery = ensureMinimalEscaping(uriQuery, charset, LaxURLCodec.QUERY_SAFE); // Preallocate. The '1's and '2's in below are space for ':', // '//', etc. URI characters. MutableString s = new MutableString(((uriScheme != null) ? uriScheme.length() : 0) + 1 // ';' + ((uriAuthority != null) ? uriAuthority.length() : 0) + 2 // '//' + ((uriPath != null) ? uriPath.length() : 0) + 1 // '?' + ((uriQuery != null) ? uriQuery.length() : 0)); appendNonNull(s, uriScheme, ":", true); appendNonNull(s, uriAuthority, "//", false); appendNonNull(s, uriPath, "", false); appendNonNull(s, uriQuery, "?", false); return s.toString(); }
From source file:org.archive.url.UsableURIFactory.java
/** * Check port on passed http authority. Make sure the size is not larger * than allowed: See the 'port' definition on this * page, http://www.kerio.com/manual/wrp/en/418.htm. * Also, we've seen port numbers of '0080' whose leading zeros confuse * the parent class. Strip the leading zeros. * * @param uriAuthority/*from w ww.j a va2 s . c om*/ * @return Null or an amended port number. * @throws URIException */ private String checkPort(String uriAuthority) throws URIException { // Matcher m = PORTREGEX.matcher(uriAuthority); Matcher m = TextUtils.getMatcher(PORTREGEX.pattern(), uriAuthority); if (m.matches()) { String no = m.group(2); if (no != null && no.length() > 0) { // First check if the port has leading zeros // as in '0080'. Strip them if it has and // then reconstitute the uriAuthority. Be careful // of cases where port is '0' or '000'. while (no.charAt(0) == '0' && no.length() > 1) { no = no.substring(1); } uriAuthority = m.group(1) + no; // Now makesure the number is legit. int portNo = 0; try { portNo = Integer.parseInt(no); } catch (NumberFormatException nfe) { // just catch and leave portNo at illegal 0 } if (portNo <= 0 || portNo > 65535) { throw new URIException("Port out of bounds: " + uriAuthority); } } } TextUtils.recycleMatcher(m); return uriAuthority; }
From source file:org.archive.wayback.util.url.KeyMakerUrlCanonicalizer.java
public String urlStringToKey(String url) throws URIException { try {/*from ww w. j a v a2 s . c o m*/ return keyMaker.makeKey(url); } catch (URISyntaxException e) { throw new URIException(e.getMessage()); } }
From source file:org.parosproxy.paros.core.scanner.Analyser.java
private String getPathRegex(URI uri) throws URIException { URI newUri;/* w ww.java 2 s . c om*/ // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient try { newUri = (URI) uri.clone(); } catch (CloneNotSupportedException e) { throw new URIException(e.getMessage()); } String query = newUri.getQuery(); StringBuilder sb = new StringBuilder(100); // case should be sensitive //sb.append("(?i)"); newUri.setQuery(null); sb.append(newUri.toString().replaceAll("\\.", "\\.")); if (query != null) { String queryPattern = "(\\?" + query + ")?"; sb.append(queryPattern); } return sb.toString(); }
From source file:org.parosproxy.paros.extension.filter.FilterLogCookie.java
@Override public void onHttpRequestSend(HttpMessage msg) { HttpRequestHeader header = msg.getRequestHeader(); if (header != null) { String cookie = header.getHeader("Cookie"); synchronized (cookieList) { if (cookie != null && cookieList.indexOf(cookie) == -1) { try { // ZAP: catch CloneNotSupportedException as introduced with version 3.1 of HttpClient URI uri;/*from w w w . ja va 2 s . c o m*/ try { uri = (URI) header.getURI().clone(); } catch (CloneNotSupportedException e) { throw new URIException(e.getMessage()); } uri.setQuery(null); String sUri = uri.toString(); cookieList.add(cookie); getView().getOutputPanel().append(sUri + DELIM + cookie + "\n"); } catch (URIException e) { // ZAP: Print stack trace to Output tab getView().getOutputPanel().append(e); } } } } }