Java tutorial
package com.alexa.stock.utils; /* * ==================================================================== * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. * */ import java.util.BitSet; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.http.util.EncodingUtils; /** * The URI escape and character encoding and decoding utility. * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather * than {@link org.apache.commons.httpclient.URI}. * * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> * @version $Revision: 507321 $ $Date: 2002/03/14 15:14:01 */ public class URIUtils { // ----------------------------------------------------- Instance variables protected static final BitSet empty = new BitSet(1); // ---------------------------------------------------------- URI utilities /** * Get the basename of an URI. It's possibly an empty string. * * @param uri a string regarded an URI * @return the basename string; an empty string if the path ends with slash */ public static String getName(String uri) { if (uri == null || uri.length() == 0) { return uri; } String path = getPath(uri); int at = path.lastIndexOf("/"); int to = path.length(); return (at >= 0) ? path.substring(at + 1, to) : path; } /** * Get the query of an URI. * * @param uri a string regarded an URI * @return the query string; <code>null</code> if empty or undefined */ public static String getQuery(String uri) { if (uri == null || uri.length() == 0) { return null; } // consider of net_path int at = uri.indexOf("//"); int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0); // the authority part of URI ignored int to = uri.length(); // reuse the at and from variables to consider the query at = uri.indexOf("?", from); if (at >= 0) { from = at + 1; } else { return null; } // check the fragment if (uri.lastIndexOf("#") > from) { to = uri.lastIndexOf("#"); } // get the path and query. return (from < 0 || from == to) ? null : uri.substring(from, to); } /** * Get the path of an URI. * * @param uri a string regarded an URI * @return the path string */ public static String getPath(String uri) { if (uri == null) { return null; } // consider of net_path int at = uri.indexOf("//"); int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0); // the authority part of URI ignored int to = uri.length(); // check the query if (uri.indexOf('?', from) != -1) { to = uri.indexOf('?', from); } // check the fragment if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) { to = uri.lastIndexOf("#"); } // get only the path. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); } /** * Get the path and query of an URI. * * @param uri a string regarded an URI * @return the path and query string */ public static String getPathQuery(String uri) { if (uri == null) { return null; } // consider of net_path int at = uri.indexOf("//"); int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0); // the authority part of URI ignored int to = uri.length(); // Ignore the '?' mark so to ignore the query. // check the fragment if (uri.lastIndexOf("#") > from) { to = uri.lastIndexOf("#"); } // get the path and query. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); } /** * Get the path of an URI and its rest part. * * @param uri a string regarded an URI * @return the string from the path part */ public static String getFromPath(String uri) { if (uri == null) { return null; } // consider of net_path int at = uri.indexOf("//"); int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0); // get the path and its rest. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from); } // ----------------------------------------------------- Encoding utilities /** * Get the all escaped and encoded string with the default protocl charset. * It's the same function to use <code>encode(String unescaped, Bitset * empty, "UTF-8")</code>. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodeAll(String unescaped) throws URIException { return encodeAll(unescaped, "UTF-8"); } /** * Get the all escaped and encoded string with a given charset. * It's the same function to use <code>encode(String unescaped, Bitset * empty, String charset)</code>. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodeAll(String unescaped, String charset) throws URIException { return encode(unescaped, empty, charset); } /** * Escape and encode a string regarded as within the authority component of * an URI with the default protocol charset. * Within the authority component, the characters ";", ":", "@", "?", and * "/" are reserved. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodeWithinAuthority(String unescaped) throws URIException { return encodeWithinAuthority(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as within the authority component of * an URI with a given charset. * Within the authority component, the characters ";", ":", "@", "?", and * "/" are reserved. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodeWithinAuthority(String unescaped, String charset) throws URIException { return encode(unescaped, allowed_within_authority, charset); } /** * Escape and encode a string regarded as the path and query components of * an URI with the default protocol charset. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodePathQuery(String unescaped) throws URIException { return encodePathQuery(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as the path and query components of * an URI with a given charset. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodePathQuery(String unescaped, String charset) throws URIException { int at = unescaped.indexOf('?'); if (at < 0) { return encode(unescaped, allowed_abs_path, charset); } // else return encode(unescaped.substring(0, at), allowed_abs_path, charset) + '?' + encode(unescaped.substring(at + 1), allowed_query, charset); } /** * Escape and encode a string regarded as within the path component of an * URI with the default protocol charset. * The path may consist of a sequence of path segments separated by a * single slash "/" character. Within a path segment, the characters * "/", ";", "=", and "?" are reserved. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodeWithinPath(String unescaped) throws URIException { return encodeWithinPath(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as within the path component of an * URI with a given charset. * The path may consist of a sequence of path segments separated by a * single slash "/" character. Within a path segment, the characters * "/", ";", "=", and "?" are reserved. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodeWithinPath(String unescaped, String charset) throws URIException { return encode(unescaped, allowed_within_path, charset); } /** * Escape and encode a string regarded as the path component of an URI with * the default protocol charset. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodePath(String unescaped) throws URIException { return encodePath(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as the path component of an URI with * a given charset. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodePath(String unescaped, String charset) throws URIException { return encode(unescaped, allowed_abs_path, charset); } /** * Escape and encode a string regarded as within the query component of an * URI with the default protocol charset. * When a query comprise the name and value pairs, it is used in order * to encode each name and value string. The reserved special characters * within a query component are being included in encoding the query. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodeWithinQuery(String unescaped) throws URIException { return encodeWithinQuery(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as within the query component of an * URI with a given charset. * When a query comprise the name and value pairs, it is used in order * to encode each name and value string. The reserved special characters * within a query component are being included in encoding the query. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodeWithinQuery(String unescaped, String charset) throws URIException { return encode(unescaped, allowed_within_query, charset); } /** * Escape and encode a string regarded as the query component of an URI with * the default protocol charset. * When a query string is not misunderstood the reserved special characters * ("&", "=", "+", ",", and "$") within a query component, this method * is recommended to use in encoding the whole query. * * @param unescaped an unescaped string * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset * @see #encode */ public static String encodeQuery(String unescaped) throws URIException { return encodeQuery(unescaped, "UTF-8"); } /** * Escape and encode a string regarded as the query component of an URI with * a given charset. * When a query string is not misunderstood the reserved special characters * ("&", "=", "+", ",", and "$") within a query component, this method * is recommended to use in encoding the whole query. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URIException if the charset is not supported * * @see #encode */ public static String encodeQuery(String unescaped, String charset) throws URIException { return encode(unescaped, allowed_query, charset); } /** * Escape and encode a given string with allowed characters not to be * escaped and the default protocol charset. * * @param unescaped a string * @param allowed allowed characters not to be escaped * @return the escaped string * * @throws URIException if the default protocol charset is not supported * * @see URI#getDefaultProtocolCharset */ public static String encode(String unescaped, BitSet allowed) throws URIException { return encode(unescaped, allowed, "UTF-8"); } /** * Escape and encode a given string with allowed characters not to be * escaped and a given charset. * * @param unescaped a string * @param allowed allowed characters not to be escaped * @param charset the charset * @return the escaped string */ public static String encode(String unescaped, BitSet allowed, String charset) throws URIException { byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtils.getBytes(unescaped, charset)); return EncodingUtils.getAsciiString(rawdata); } /** * Unescape and decode a given string regarded as an escaped string with the * default protocol charset. * * @param escaped a string * @return the unescaped string * * @throws URIException if the string cannot be decoded (invalid) * * @see URI#getDefaultProtocolCharset */ public static String decode(String escaped) throws URIException { try { byte[] rawdata = URLCodec.decodeUrl(EncodingUtils.getAsciiBytes(escaped)); return EncodingUtils.getString(rawdata, "UTF-8"); } catch (DecoderException e) { throw new URIException(e.getMessage()); } } /** * The percent "%" character always has the reserved purpose of being the * escape indicator, it must be escaped as "%25" in order to be used as * data within a */ protected static final BitSet percent = new BitSet(256); // Static initializer for percent static { percent.set('%'); } /** * BitSet for digit. * <p><blockquote><pre> * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | * "8" | "9" * </pre></blockquote><p> */ protected static final BitSet digit = new BitSet(256); // Static initializer for digit static { for (int i = '0'; i <= '9'; i++) { digit.set(i); } } /** * BitSet for alpha. * <p><blockquote><pre> * alpha = lowalpha | upalpha * </pre></blockquote><p> */ protected static final BitSet alpha = new BitSet(256); // Static initializer for alpha static { for (int i = 'a'; i <= 'z'; i++) { alpha.set(i); } for (int i = 'A'; i <= 'Z'; i++) { alpha.set(i); } } /** * BitSet for alphanum (join of alpha & digit). * <p><blockquote><pre> * alphanum = alpha | digit * </pre></blockquote><p> */ protected static final BitSet alphanum = new BitSet(256); // Static initializer for alphanum static { alphanum.or(alpha); alphanum.or(digit); } /** * BitSet for hex. * <p><blockquote><pre> * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | * "a" | "b" | "c" | "d" | "e" | "f" * </pre></blockquote><p> */ protected static final BitSet hex = new BitSet(256); // Static initializer for hex static { hex.or(digit); for (int i = 'a'; i <= 'f'; i++) { hex.set(i); } for (int i = 'A'; i <= 'F'; i++) { hex.set(i); } } /** * BitSet for escaped. * <p><blockquote><pre> * escaped = "%" hex hex * </pre></blockquote><p> */ protected static final BitSet escaped = new BitSet(256); // Static initializer for escaped static { escaped.or(percent); escaped.or(hex); } /** * BitSet for mark. * <p><blockquote><pre> * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | * "(" | ")" * </pre></blockquote><p> */ protected static final BitSet mark = new BitSet(256); // Static initializer for mark static { mark.set('-'); mark.set('_'); mark.set('.'); mark.set('!'); mark.set('~'); mark.set('*'); mark.set('\''); mark.set('('); mark.set(')'); } /** * Data characters that are allowed in a URI but do not have a reserved * purpose are called unreserved. * <p><blockquote><pre> * unreserved = alphanum | mark * </pre></blockquote><p> */ protected static final BitSet unreserved = new BitSet(256); // Static initializer for unreserved static { unreserved.or(alphanum); unreserved.or(mark); } /** * BitSet for reserved. * <p><blockquote><pre> * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | * "$" | "," * </pre></blockquote><p> */ protected static final BitSet reserved = new BitSet(256); // Static initializer for reserved static { reserved.set(';'); reserved.set('/'); reserved.set('?'); reserved.set(':'); reserved.set('@'); reserved.set('&'); reserved.set('='); reserved.set('+'); reserved.set('$'); reserved.set(','); } /** * BitSet for uric. * <p><blockquote><pre> * uric = reserved | unreserved | escaped * </pre></blockquote><p> */ protected static final BitSet uric = new BitSet(256); // Static initializer for uric static { uric.or(reserved); uric.or(unreserved); uric.or(escaped); } /** * BitSet for fragment (alias for uric). * <p><blockquote><pre> * fragment = *uric * </pre></blockquote><p> */ protected static final BitSet fragment = uric; /** * BitSet for query (alias for uric). * <p><blockquote><pre> * query = *uric * </pre></blockquote><p> */ protected static final BitSet query = uric; /** * BitSet for pchar. * <p><blockquote><pre> * pchar = unreserved | escaped | * ":" | "@" | "&" | "=" | "+" | "$" | "," * </pre></blockquote><p> */ protected static final BitSet pchar = new BitSet(256); // Static initializer for pchar static { pchar.or(unreserved); pchar.or(escaped); pchar.set(':'); pchar.set('@'); pchar.set('&'); pchar.set('='); pchar.set('+'); pchar.set('$'); pchar.set(','); } /** * BitSet for param (alias for pchar). * <p><blockquote><pre> * param = *pchar * </pre></blockquote><p> */ protected static final BitSet param = pchar; /** * BitSet for segment. * <p><blockquote><pre> * segment = *pchar *( ";" param ) * </pre></blockquote><p> */ protected static final BitSet segment = new BitSet(256); // Static initializer for segment static { segment.or(pchar); segment.set(';'); segment.or(param); } /** * BitSet for path segments. * <p><blockquote><pre> * path_segments = segment *( "/" segment ) * </pre></blockquote><p> */ protected static final BitSet path_segments = new BitSet(256); // Static initializer for path_segments static { path_segments.set('/'); path_segments.or(segment); } /** * URI absolute path. * <p><blockquote><pre> * abs_path = "/" path_segments * </pre></blockquote><p> */ protected static final BitSet abs_path = new BitSet(256); // Static initializer for abs_path static { abs_path.set('/'); abs_path.or(path_segments); } /** * URI bitset for encoding typical non-slash characters. * <p><blockquote><pre> * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | * "&" | "=" | "+" | "$" | "," * </pre></blockquote><p> */ protected static final BitSet uric_no_slash = new BitSet(256); // Static initializer for uric_no_slash static { uric_no_slash.or(unreserved); uric_no_slash.or(escaped); uric_no_slash.set(';'); uric_no_slash.set('?'); uric_no_slash.set(';'); uric_no_slash.set('@'); uric_no_slash.set('&'); uric_no_slash.set('='); uric_no_slash.set('+'); uric_no_slash.set('$'); uric_no_slash.set(','); } /** * URI bitset that combines uric_no_slash and uric. * <p><blockquote><pre> * opaque_part = uric_no_slash *uric * </pre></blockquote><p> */ protected static final BitSet opaque_part = new BitSet(256); // Static initializer for opaque_part static { // it's generous. because first character must not include a slash opaque_part.or(uric_no_slash); opaque_part.or(uric); } /** * URI bitset that combines absolute path and opaque part. * <p><blockquote><pre> * path = [ abs_path | opaque_part ] * </pre></blockquote><p> */ protected static final BitSet path = new BitSet(256); // Static initializer for path static { path.or(abs_path); path.or(opaque_part); } /** * Port, a logical alias for digit. */ protected static final BitSet port = digit; /** * Bitset that combines digit and dot fo IPv$address. * <p><blockquote><pre> * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit * </pre></blockquote><p> */ protected static final BitSet IPv4address = new BitSet(256); // Static initializer for IPv4address static { IPv4address.or(digit); IPv4address.set('.'); } /** * RFC 2373. * <p><blockquote><pre> * IPv6address = hexpart [ ":" IPv4address ] * </pre></blockquote><p> */ protected static final BitSet IPv6address = new BitSet(256); // Static initializer for IPv6address reference static { IPv6address.or(hex); // hexpart IPv6address.set(':'); IPv6address.or(IPv4address); } /** * RFC 2732, 2373. * <p><blockquote><pre> * IPv6reference = "[" IPv6address "]" * </pre></blockquote><p> */ protected static final BitSet IPv6reference = new BitSet(256); // Static initializer for IPv6reference static { IPv6reference.set('['); IPv6reference.or(IPv6address); IPv6reference.set(']'); } /** * BitSet for toplabel. * <p><blockquote><pre> * toplabel = alpha | alpha *( alphanum | "-" ) alphanum * </pre></blockquote><p> */ protected static final BitSet toplabel = new BitSet(256); // Static initializer for toplabel static { toplabel.or(alphanum); toplabel.set('-'); } /** * BitSet for domainlabel. * <p><blockquote><pre> * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum * </pre></blockquote><p> */ protected static final BitSet domainlabel = toplabel; /** * BitSet for hostname. * <p><blockquote><pre> * hostname = *( domainlabel "." ) toplabel [ "." ] * </pre></blockquote><p> */ protected static final BitSet hostname = new BitSet(256); // Static initializer for hostname static { hostname.or(toplabel); // hostname.or(domainlabel); hostname.set('.'); } /** * BitSet for host. * <p><blockquote><pre> * host = hostname | IPv4address | IPv6reference * </pre></blockquote><p> */ protected static final BitSet host = new BitSet(256); // Static initializer for host static { host.or(hostname); // host.or(IPv4address); host.or(IPv6reference); // IPv4address } /** * BitSet for hostport. * <p><blockquote><pre> * hostport = host [ ":" port ] * </pre></blockquote><p> */ protected static final BitSet hostport = new BitSet(256); // Static initializer for hostport static { hostport.or(host); hostport.set(':'); hostport.or(port); } /** * Bitset for userinfo. * <p><blockquote><pre> * userinfo = *( unreserved | escaped | * ";" | ":" | "&" | "=" | "+" | "$" | "," ) * </pre></blockquote><p> */ protected static final BitSet userinfo = new BitSet(256); // Static initializer for userinfo static { userinfo.or(unreserved); userinfo.or(escaped); userinfo.set(';'); userinfo.set(':'); userinfo.set('&'); userinfo.set('='); userinfo.set('+'); userinfo.set('$'); userinfo.set(','); } /** * BitSet for within the userinfo component like user and password. */ public static final BitSet within_userinfo = new BitSet(256); // Static initializer for within_userinfo static { within_userinfo.or(userinfo); within_userinfo.clear(';'); // reserved within authority within_userinfo.clear(':'); within_userinfo.clear('@'); within_userinfo.clear('?'); within_userinfo.clear('/'); } /** * Bitset for server. * <p><blockquote><pre> * server = [ [ userinfo "@" ] hostport ] * </pre></blockquote><p> */ protected static final BitSet server = new BitSet(256); // Static initializer for server static { server.or(userinfo); server.set('@'); server.or(hostport); } /** * BitSet for reg_name. * <p><blockquote><pre> * reg_name = 1*( unreserved | escaped | "$" | "," | * ";" | ":" | "@" | "&" | "=" | "+" ) * </pre></blockquote><p> */ protected static final BitSet reg_name = new BitSet(256); // Static initializer for reg_name static { reg_name.or(unreserved); reg_name.or(escaped); reg_name.set('$'); reg_name.set(','); reg_name.set(';'); reg_name.set(':'); reg_name.set('@'); reg_name.set('&'); reg_name.set('='); reg_name.set('+'); } /** * BitSet for authority. * <p><blockquote><pre> * authority = server | reg_name * </pre></blockquote><p> */ protected static final BitSet authority = new BitSet(256); // Static initializer for authority static { authority.or(server); authority.or(reg_name); } /** * BitSet for scheme. * <p><blockquote><pre> * scheme = alpha *( alpha | digit | "+" | "-" | "." ) * </pre></blockquote><p> */ protected static final BitSet scheme = new BitSet(256); // Static initializer for scheme static { scheme.or(alpha); scheme.or(digit); scheme.set('+'); scheme.set('-'); scheme.set('.'); } /** * BitSet for rel_segment. * <p><blockquote><pre> * rel_segment = 1*( unreserved | escaped | * ";" | "@" | "&" | "=" | "+" | "$" | "," ) * </pre></blockquote><p> */ protected static final BitSet rel_segment = new BitSet(256); // Static initializer for rel_segment static { rel_segment.or(unreserved); rel_segment.or(escaped); rel_segment.set(';'); rel_segment.set('@'); rel_segment.set('&'); rel_segment.set('='); rel_segment.set('+'); rel_segment.set('$'); rel_segment.set(','); } /** * BitSet for rel_path. * <p><blockquote><pre> * rel_path = rel_segment [ abs_path ] * </pre></blockquote><p> */ protected static final BitSet rel_path = new BitSet(256); // Static initializer for rel_path static { rel_path.or(rel_segment); rel_path.or(abs_path); } /** * BitSet for net_path. * <p><blockquote><pre> * net_path = "//" authority [ abs_path ] * </pre></blockquote><p> */ protected static final BitSet net_path = new BitSet(256); // Static initializer for net_path static { net_path.set('/'); net_path.or(authority); net_path.or(abs_path); } /** * BitSet for hier_part. * <p><blockquote><pre> * hier_part = ( net_path | abs_path ) [ "?" query ] * </pre></blockquote><p> */ protected static final BitSet hier_part = new BitSet(256); // Static initializer for hier_part static { hier_part.or(net_path); hier_part.or(abs_path); // hier_part.set('?'); aleady included hier_part.or(query); } /** * BitSet for relative * <p><blockquote><pre> * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] * </pre></blockquote><p> */ protected static final BitSet relativeURI = new BitSet(256); // Static initializer for relativeURI static { relativeURI.or(net_path); relativeURI.or(abs_path); relativeURI.or(rel_path); // relativeURI.set('?'); aleady included relativeURI.or(query); } /** * BitSet for absoluteURI. * <p><blockquote><pre> * absoluteURI = scheme ":" ( hier_part | opaque_part ) * </pre></blockquote><p> */ protected static final BitSet absoluteURI = new BitSet(256); // Static initializer for absoluteURI static { absoluteURI.or(scheme); absoluteURI.set(':'); absoluteURI.or(hier_part); absoluteURI.or(opaque_part); } /** * BitSet for URI-reference. * <p><blockquote><pre> * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] * </pre></blockquote><p> */ protected static final BitSet URI_reference = new BitSet(256); // Static initializer for URI_reference static { URI_reference.or(absoluteURI); URI_reference.or(relativeURI); URI_reference.set('#'); URI_reference.or(fragment); } // ---------------------------- Characters disallowed within the URI syntax // Excluded US-ASCII Characters are like control, space, delims and unwise /** * BitSet for control. */ public static final BitSet control = new BitSet(256); // Static initializer for control static { for (int i = 0; i <= 0x1F; i++) { control.set(i); } control.set(0x7F); } /** * BitSet for space. */ public static final BitSet space = new BitSet(256); // Static initializer for space static { space.set(0x20); } /** * BitSet for delims. */ public static final BitSet delims = new BitSet(256); // Static initializer for delims static { delims.set('<'); delims.set('>'); delims.set('#'); delims.set('%'); delims.set('"'); } /** * BitSet for unwise. */ public static final BitSet unwise = new BitSet(256); // Static initializer for unwise static { unwise.set('{'); unwise.set('}'); unwise.set('|'); unwise.set('\\'); unwise.set('^'); unwise.set('['); unwise.set(']'); unwise.set('`'); } /** * Disallowed rel_path before escaping. */ public static final BitSet disallowed_rel_path = new BitSet(256); // Static initializer for disallowed_rel_path static { disallowed_rel_path.or(uric); disallowed_rel_path.andNot(rel_path); } /** * Disallowed opaque_part before escaping. */ public static final BitSet disallowed_opaque_part = new BitSet(256); // Static initializer for disallowed_opaque_part static { disallowed_opaque_part.or(uric); disallowed_opaque_part.andNot(opaque_part); } // ----------------------- Characters allowed within and for each component /** * Those characters that are allowed for the authority component. */ public static final BitSet allowed_authority = new BitSet(256); // Static initializer for allowed_authority static { allowed_authority.or(authority); allowed_authority.clear('%'); } /** * Those characters that are allowed for the opaque_part. */ public static final BitSet allowed_opaque_part = new BitSet(256); // Static initializer for allowed_opaque_part static { allowed_opaque_part.or(opaque_part); allowed_opaque_part.clear('%'); } /** * Those characters that are allowed for the reg_name. */ public static final BitSet allowed_reg_name = new BitSet(256); // Static initializer for allowed_reg_name static { allowed_reg_name.or(reg_name); // allowed_reg_name.andNot(percent); allowed_reg_name.clear('%'); } /** * Those characters that are allowed for the userinfo component. */ public static final BitSet allowed_userinfo = new BitSet(256); // Static initializer for allowed_userinfo static { allowed_userinfo.or(userinfo); // allowed_userinfo.andNot(percent); allowed_userinfo.clear('%'); } /** * Those characters that are allowed for within the userinfo component. */ public static final BitSet allowed_within_userinfo = new BitSet(256); // Static initializer for allowed_within_userinfo static { allowed_within_userinfo.or(within_userinfo); allowed_within_userinfo.clear('%'); } /** * Those characters that are allowed for the IPv6reference component. * The characters '[', ']' in IPv6reference should be excluded. */ public static final BitSet allowed_IPv6reference = new BitSet(256); // Static initializer for allowed_IPv6reference static { allowed_IPv6reference.or(IPv6reference); // allowed_IPv6reference.andNot(unwise); allowed_IPv6reference.clear('['); allowed_IPv6reference.clear(']'); } /** * Those characters that are allowed for the host component. * The characters '[', ']' in IPv6reference should be excluded. */ public static final BitSet allowed_host = new BitSet(256); // Static initializer for allowed_host static { allowed_host.or(hostname); allowed_host.or(allowed_IPv6reference); } /** * Those characters that are allowed for the authority component. */ public static final BitSet allowed_within_authority = new BitSet(256); // Static initializer for allowed_within_authority static { allowed_within_authority.or(server); allowed_within_authority.or(reg_name); allowed_within_authority.clear(';'); allowed_within_authority.clear(':'); allowed_within_authority.clear('@'); allowed_within_authority.clear('?'); allowed_within_authority.clear('/'); } /** * Those characters that are allowed for the abs_path. */ public static final BitSet allowed_abs_path = new BitSet(256); // Static initializer for allowed_abs_path static { allowed_abs_path.or(abs_path); // allowed_abs_path.set('/'); // aleady included allowed_abs_path.andNot(percent); allowed_abs_path.clear('+'); } /** * Those characters that are allowed for the rel_path. */ public static final BitSet allowed_rel_path = new BitSet(256); // Static initializer for allowed_rel_path static { allowed_rel_path.or(rel_path); allowed_rel_path.clear('%'); allowed_rel_path.clear('+'); } /** * Those characters that are allowed within the path. */ public static final BitSet allowed_within_path = new BitSet(256); // Static initializer for allowed_within_path static { allowed_within_path.or(abs_path); allowed_within_path.clear('/'); allowed_within_path.clear(';'); allowed_within_path.clear('='); allowed_within_path.clear('?'); } /** * Those characters that are allowed for the query component. */ public static final BitSet allowed_query = new BitSet(256); // Static initializer for allowed_query static { allowed_query.or(uric); allowed_query.clear('%'); } /** * Those characters that are allowed within the query component. */ public static final BitSet allowed_within_query = new BitSet(256); // Static initializer for allowed_within_query static { allowed_within_query.or(allowed_query); allowed_within_query.andNot(reserved); // excluded 'reserved' } /** * Those characters that are allowed for the fragment component. */ public static final BitSet allowed_fragment = new BitSet(256); // Static initializer for allowed_fragment static { allowed_fragment.or(uric); allowed_fragment.clear('%'); } public static class URIException extends Exception { /** * */ private static final long serialVersionUID = 1L; public URIException() { super(); // TODO Auto-generated constructor stub } public URIException(String message, Throwable cause) { super(message, cause); // TODO Auto-generated constructor stub } public URIException(String message) { super(message); // TODO Auto-generated constructor stub } public URIException(Throwable cause) { super(cause); // TODO Auto-generated constructor stub } } }