1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.jetty;
16  
17  import java.io.UnsupportedEncodingException;
18  
19  import org.mortbay.util.MultiMap;
20  import org.mortbay.util.StringUtil;
21  import org.mortbay.util.TypeUtil;
22  import org.mortbay.util.URIUtil;
23  import org.mortbay.util.UrlEncoded;
24  
25  
26  /* ------------------------------------------------------------ */
27  /** Http URI.
28   * Parse a HTTP URI from a string or byte array.  Given a URI
29   * <code>http://user@host:port/path/info;param?query#fragment</code>
30   * this class will split it into the following undecoded optional elements:<ul>
31   * <li>{@link #getScheme()} - http:</li>
32   * <li>{@link #getAuthority()} - //name@host:port</li>
33   * <li>{@link #getHost()} - host</li>
34   * <li>{@link #getPort()} - port</li>
35   * <li>{@link #getPath()} - /path/info</li>
36   * <li>{@link #getParam()} - param</li>
37   * <li>{@link #getQuery()} - query</li>
38   * <li>{@link #getFragment()} - fragment</li>
39   * </ul>
40   * 
41   */
42  public class HttpURI
43  {
44      private static byte[] __empty={}; 
45      private final static int 
46      START=0,
47      AUTH_OR_PATH=1,
48      SCHEME_OR_PATH=2,
49      AUTH=4,
50      IPV6=5,
51      PORT=6,
52      PATH=7,
53      PARAM=8,
54      QUERY=9;
55      
56      boolean _partial=false;
57      byte[] _raw=__empty;
58      String _rawString;
59      int _scheme;
60      int _authority;
61      int _host;
62      int _port;
63      int _path;
64      int _param;
65      int _query;
66      int _fragment;
67      int _end;
68      
69      public HttpURI()
70      {
71          
72      } 
73      
74      /* ------------------------------------------------------------ */
75      /**
76       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
77       */
78      public HttpURI(boolean parsePartialAuth)
79      {
80          _partial=parsePartialAuth;
81      }
82      
83      public HttpURI(String raw)
84      {
85          _rawString=raw;
86          byte[] b = raw.getBytes();
87          parse(b,0,b.length);
88      }
89      
90      public HttpURI(byte[] raw,int offset, int length)
91      {
92          parse2(raw,offset,length);
93      }
94      
95      public void parse(String raw)
96      {
97          byte[] b = raw.getBytes();
98          parse2(b,0,b.length);
99          _rawString=raw;
100     }
101     
102     public void parse(byte[] raw,int offset, int length)
103     {
104         _rawString=null;
105         parse2(raw,offset,length);
106     }
107     
108     private void parse2(byte[] raw,int offset, int length)
109     {
110         _raw=raw;
111         int i=offset;
112         int e=offset+length;
113         int state=START;
114         int m=offset;
115         _end=offset+length;
116         _scheme=offset;
117         _authority=offset;
118         _host=offset;
119         _port=offset;
120         _path=offset;
121         _param=_end;
122         _query=_end;
123         _fragment=_end;
124         while (i<e)
125         {
126             char c=(char)(0xff&_raw[i]);
127             int s=i++;
128             
129             state: switch (state)
130             {
131                 case START:
132                 {
133                     m=s;
134                     if (c=='/')
135                     {
136                         state=AUTH_OR_PATH;
137                     }
138                     else if (Character.isLetterOrDigit(c))
139                     {
140                         state=SCHEME_OR_PATH;
141                     }
142                     else if (c==';')
143                     {
144                         _param=s;
145                         state=PARAM;
146                     }
147                     else if (c=='?')
148                     {
149                         _param=s;
150                         _query=s;
151                         state=QUERY;
152                     }
153                     else if (c=='#')
154                     {
155                         _param=s;
156                         _query=s;
157                         _fragment=s;
158                         break;
159                     }
160                     else
161                         throw new IllegalArgumentException(StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
162                     
163                     continue;
164                 }
165 
166                 case AUTH_OR_PATH:
167                 {
168                     if ((_partial||_scheme!=_authority) && c=='/')
169                     {
170                         _host=i;
171                         _port=_end;
172                         _path=_end;
173                         state=AUTH;
174                     }
175                     else if (c==';' || c=='?' || c=='#')
176                     {
177                         i--;
178                         state=PATH;
179                     }  
180                     else
181                     {
182                         _host=m;
183                         _port=m;
184                         state=PATH;
185                     }  
186                     continue;
187                 }
188                 
189                 case SCHEME_OR_PATH:
190                 {
191                     // short cut for http and https
192                     if (length>6 && c=='t')
193                     {
194                         if (_raw[offset+3]==':')
195                         {
196                             s=offset+3;
197                             i=offset+4;
198                             c=':';
199                         }
200                         else if (_raw[offset+4]==':')
201                         {
202                             s=offset+4;
203                             i=offset+5;
204                             c=':';
205                         }
206                         else if (_raw[offset+5]==':')
207                         {
208                             s=offset+5;
209                             i=offset+6;
210                             c=':';
211                         }
212                     }
213                     
214                     switch (c)
215                     {
216                         case ':':
217                         {
218                             m = i++;
219                             _authority = m;
220                             _path = m;
221                             c = (char)(0xff & _raw[i]);
222                             if (c == '/')
223                                 state = AUTH_OR_PATH;
224                             else
225                             {
226                                 _host = m;
227                                 _port = m;
228                                 state = PATH;
229                             }
230                             break;
231                         }
232                         
233                         case '/':
234                         {
235                             state = PATH;
236                             break;
237                         }
238                         
239                         case ';':
240                         {
241                             _param = s;
242                             state = PARAM;
243                             break;
244                         }
245                         
246                         case '?':
247                         {
248                             _param = s;
249                             _query = s;
250                             state = QUERY;
251                             break;
252                         }
253                         
254                         case '#':
255                         {
256                             _param = s;
257                             _query = s;
258                             _fragment = s;
259                             break;
260                         }
261                     }
262                     continue;
263                 }
264                 
265                 case AUTH:
266                 {
267                     switch (c)
268                     {
269 
270                         case '/':
271                         {
272                             m = s;
273                             _path = m;
274                             _port = _path;
275                             state = PATH;
276                             break;
277                         }
278                         case '@':
279                         {
280                             _host = i;
281                             break;
282                         }
283                         case ':':
284                         {
285                             _port = s;
286                             state = PORT;
287                             break;
288                         }
289                         case '[':
290                         {
291                             state = IPV6;
292                             break;
293                         }
294                     }
295                     continue;
296                 }
297 
298                 case IPV6:
299                 {
300                     switch (c)
301                     {
302                         case '/':
303                         {
304                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
305                         }
306                         case ']':
307                         {
308                             state = AUTH;
309                             break;
310                         }
311                     }
312 
313                     continue;
314                 }
315                 
316                 case PORT:
317                 {
318                     if (c=='/')
319                     {
320                         m=s;
321                         _path=m;
322                         if (_port<=_authority)
323                             _port=_path;
324                         state=PATH;
325                     }
326                     continue;
327                 }
328                 
329                 case PATH:
330                 {
331                     switch (c)
332                     {
333                         case ';':
334                         {
335                             _param = s;
336                             state = PARAM;
337                             break;
338                         }
339                         case '?':
340                         {
341                             _param = s;
342                             _query = s;
343                             state = QUERY;
344                             break;
345                         }
346                         case '#':
347                         {
348                             _param = s;
349                             _query = s;
350                             _fragment = s;
351                             break state;
352                         }
353                     }
354                     continue;
355                 }
356                 
357                 case PARAM:
358                 {
359                     switch (c)
360                     {
361                         case '?':
362                         {
363                             _query = s;
364                             state = QUERY;
365                             break;
366                         }
367                         case '#':
368                         {
369                             _query = s;
370                             _fragment = s;
371                             break state;
372                         }
373                     }
374                     continue;
375                 }
376                 
377                 case QUERY:
378                 {
379                     if (c=='#')
380                     {
381                         _fragment=s;
382                         break state;
383                     }
384                     continue;
385                 }
386             }
387         }
388     }
389     
390     
391     public String getScheme()
392     {
393         if (_scheme==_authority)
394             return null;
395         int l=_authority-_scheme;
396         if (l==5 && 
397             _raw[_scheme]=='h' && 
398             _raw[_scheme+1]=='t' && 
399             _raw[_scheme+2]=='t' && 
400             _raw[_scheme+3]=='p' )
401             return HttpSchemes.HTTP;
402         if (l==6 && 
403             _raw[_scheme]=='h' && 
404             _raw[_scheme+1]=='t' && 
405             _raw[_scheme+2]=='t' && 
406             _raw[_scheme+3]=='p' && 
407             _raw[_scheme+4]=='s' )
408             return HttpSchemes.HTTPS;
409         return StringUtil.toString(_raw,_scheme,_authority-_scheme-1,URIUtil.__CHARSET);
410     }
411     
412     public String getAuthority()
413     {
414         if (_authority==_path)
415             return null;
416         return StringUtil.toString(_raw,_authority,_path-_authority,URIUtil.__CHARSET);
417     }
418     
419     public String getHost()
420     {
421         if (_host==_port)
422             return null;
423         return StringUtil.toString(_raw,_host,_port-_host,URIUtil.__CHARSET);
424     }
425     
426     public int getPort()
427     {
428         if (_port==_path)
429             return -1;
430         return TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
431     }
432     
433     public String getPath()
434     {
435         if (_path==_param)
436             return null;
437         return StringUtil.toString(_raw,_path,_param-_path,URIUtil.__CHARSET);
438     }
439     
440     public String getDecodedPath()
441     {
442         if (_path==_param)
443             return null;
444         return URIUtil.decodePath(_raw,_path,_param-_path);
445     }
446     
447     public String getPathAndParam()
448     {
449         if (_path==_query)
450             return null;
451         return StringUtil.toString(_raw,_path,_query-_path,URIUtil.__CHARSET);
452     }
453     
454     public String getCompletePath()
455     {
456         if (_path==_end)
457             return null;
458         return StringUtil.toString(_raw,_path,_end-_path,URIUtil.__CHARSET);
459     }
460     
461     public String getParam()
462     {
463         if (_param==_query)
464             return null;
465         return StringUtil.toString(_raw,_param+1,_query-_param-1,URIUtil.__CHARSET);
466     }
467     
468     public String getQuery()
469     {
470         if (_query==_fragment)
471             return null;
472         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,URIUtil.__CHARSET);
473     }
474     
475     public String getQuery(String encoding)
476     {
477         if (_query==_fragment)
478             return null;
479         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding==null?URIUtil.__CHARSET:encoding);
480     }
481     
482     public String getFragment()
483     {
484         if (_fragment==_end)
485             return null;
486         return StringUtil.toString(_raw,_fragment+1,_end-_fragment-1,URIUtil.__CHARSET);
487     }
488 
489     public void decodeQueryTo(MultiMap parameters, String encoding) 
490         throws UnsupportedEncodingException
491     {
492         if (_query==_fragment)
493             return;
494        
495         if (encoding==null)
496             encoding=URIUtil.__CHARSET;
497         
498         if (StringUtil.isUTF8(encoding))
499             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
500         else
501             UrlEncoded.decodeTo(StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding),parameters,encoding);
502     }
503 
504     public void clear()
505     {
506         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
507         _raw=__empty;
508         _rawString="";
509     }
510     
511     public String toString()
512     {
513         if (_rawString==null)
514             _rawString= StringUtil.toString(_raw,_scheme,_end-_scheme,URIUtil.__CHARSET);
515         return _rawString;
516     }
517     
518 }