List of usage examples for org.apache.commons.httpclient.util URIUtil encode
public static String encode(String unescaped, BitSet allowed, String charset) throws URIException
From source file:davmail.caldav.CaldavConnection.java
static String encodePath(CaldavRequest request, String path) throws URIException { if (request.isIcal5()) { return URIUtil.encode(path, ical_allowed_abs_path, "UTF-8"); } else {/*from w w w .j av a 2 s. co m*/ return URIUtil.encodePath(path, "UTF-8"); } }
From source file:nl.basjes.parse.httpdlog.dissectors.HttpUriDissector.java
@Override public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure { final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname); String uriString = field.getValue().getString(); if (uriString == null || uriString.isEmpty()) { return; // Nothing to do here }/*from w w w . ja v a 2 s . co m*/ // First we cleanup the URI so we fail less often over 'garbage' URIs. // See: http://stackoverflow.com/questions/11038967/brackets-in-a-request-url-are-legal-but-not-in-a-uri-java try { uriString = URIUtil.encode(uriString, badUriChars, "UTF-8"); } catch (URIException e) { throw new DissectionFailure( "Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage()); } // Before we hand it to the standard parser we hack it around a bit so we can parse // nasty edge cases that are illegal yet do occur in real clickstreams. // Also we force the query string to start with ?& so the returned query string starts with & // Which leads to more consistent output after parsing. int firstQuestionMark = uriString.indexOf('?'); int firstAmpersand = uriString.indexOf('&'); // Now we can have one of 3 situations: // 1) No query string // 2) Query string starts with a '?' // (and optionally followed by one or more '&' or '?' ) // 3) Query string starts with a '&'. This is invalid but does occur! // We may have ?x=x&y=y?z=z so we normalize it always // to: ?&x=x&y=y&z=z if (firstAmpersand != -1 || firstQuestionMark != -1) { uriString = uriString.replaceAll("\\?", "&"); uriString = uriString.replaceFirst("&", "?&"); } // We find that people muck up the URL by putting % signs in the URLs that are NOT escape sequences // So any % that is not followed by a two 'hex' letters is fixed uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1"); uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1"); boolean isUrl = true; URI uri; try { if (uriString.charAt(0) == '/') { uri = URI.create("dummy-protocol://dummy.host.name" + uriString); isUrl = false; // I.e. we do not return the values we just faked. } else { uri = URI.create(uriString); } } catch (IllegalArgumentException e) { throw new DissectionFailure( "Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage()); } if (wantQuery || wantPath || wantRef) { if (wantQuery) { String query = uri.getRawQuery(); if (query == null) { query = ""; } parsable.addDissection(inputname, "HTTP.QUERYSTRING", "query", query); } if (wantPath) { parsable.addDissection(inputname, "HTTP.PATH", "path", uri.getPath()); } if (wantRef) { parsable.addDissection(inputname, "HTTP.REF", "ref", uri.getFragment()); } } if (isUrl) { if (wantProtocol) { parsable.addDissection(inputname, "HTTP.PROTOCOL", "protocol", uri.getScheme()); } if (wantUserinfo) { parsable.addDissection(inputname, "HTTP.USERINFO", "userinfo", uri.getUserInfo()); } if (wantHost) { parsable.addDissection(inputname, "HTTP.HOST", "host", uri.getHost()); } if (wantPort) { if (uri.getPort() != -1) { parsable.addDissection(inputname, "HTTP.PORT", "port", uri.getPort()); } } } }