1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.jetty;
16  
17  import java.io.UnsupportedEncodingException;
18  
19  import org.mortbay.util.MultiMap;
20  import org.mortbay.util.StringUtil;
21  import org.mortbay.util.TypeUtil;
22  import org.mortbay.util.URIUtil;
23  import org.mortbay.util.UrlEncoded;
24  import org.mortbay.util.Utf8StringBuilder;
25  
26  
27  /* ------------------------------------------------------------ */
28  /** Http URI.
29   * Parse a HTTP URI from a string or byte array.  Given a URI
30   * <code>http://user@host:port/path/info;param?query#fragment</code>
31   * this class will split it into the following undecoded optional elements:<ul>
32   * <li>{@link #getScheme()} - http:</li>
33   * <li>{@link #getAuthority()} - //name@host:port</li>
34   * <li>{@link #getHost()} - host</li>
35   * <li>{@link #getPort()} - port</li>
36   * <li>{@link #getPath()} - /path/info</li>
37   * <li>{@link #getParam()} - param</li>
38   * <li>{@link #getQuery()} - query</li>
39   * <li>{@link #getFragment()} - fragment</li>
40   * </ul>
41   * 
42   */
43  public class HttpURI
44  {
45      private static byte[] __empty={}; 
46      private final static int 
47      START=0,
48      AUTH_OR_PATH=1,
49      SCHEME_OR_PATH=2,
50      AUTH=4,
51      IPV6=5,
52      PORT=6,
53      PATH=7,
54      PARAM=8,
55      QUERY=9,
56      ASTERISK=10;
57      
58      boolean _partial=false;
59      byte[] _raw=__empty;
60      String _rawString;
61      int _scheme;
62      int _authority;
63      int _host;
64      int _port;
65      int _path;
66      int _param;
67      int _query;
68      int _fragment;
69      int _end;
70      boolean _encoded=false;
71      
72      Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
73      
74      public HttpURI()
75      {
76          
77      } 
78      
79      /* ------------------------------------------------------------ */
80      /**
81       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
82       */
83      public HttpURI(boolean parsePartialAuth)
84      {
85          _partial=parsePartialAuth;
86      }
87      
88      public HttpURI(String raw)
89      {
90          _rawString=raw;
91          byte[] b = raw.getBytes();
92          parse(b,0,b.length);
93      }
94      
95      public HttpURI(byte[] raw,int offset, int length)
96      {
97          parse2(raw,offset,length);
98      }
99      
100     public void parse(String raw)
101     {
102         byte[] b = raw.getBytes();
103         parse2(b,0,b.length);
104         _rawString=raw;
105     }
106     
107     public void parse(byte[] raw,int offset, int length)
108     {
109         _rawString=null;
110         parse2(raw,offset,length);
111     }
112     
113     private void parse2(byte[] raw,int offset, int length)
114     {
115         _encoded=false;
116         _raw=raw;
117         int i=offset;
118         int e=offset+length;
119         int state=START;
120         int m=offset;
121         _end=offset+length;
122         _scheme=offset;
123         _authority=offset;
124         _host=offset;
125         _port=offset;
126         _path=offset;
127         _param=_end;
128         _query=_end;
129         _fragment=_end;
130         while (i<e)
131         {
132             char c=(char)(0xff&_raw[i]);
133             int s=i++;
134             
135             state: switch (state)
136             {
137                 case START:
138                 {
139                     m=s;
140                     switch(c)
141                     {
142                         case '/':
143                             state=AUTH_OR_PATH;
144                             break;
145                         case ';':
146                             _param=s;
147                             state=PARAM;
148                             break;
149                         case '?':
150                             _param=s;
151                             _query=s;
152                             state=QUERY;
153                             break;
154                         case '#':
155                             _param=s;
156                             _query=s;
157                             _fragment=s;
158                             break;
159                         case '*':
160                             _path=s;
161                             state=ASTERISK;
162                             break;
163                             
164                         default:
165                             if (Character.isLetterOrDigit(c))
166                                 state=SCHEME_OR_PATH;
167                             else
168                                 throw new IllegalArgumentException(StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
169                     }
170                     
171                     continue;
172                 }
173 
174                 case AUTH_OR_PATH:
175                 {
176                     if ((_partial||_scheme!=_authority) && c=='/')
177                     {
178                         _host=i;
179                         _port=_end;
180                         _path=_end;
181                         state=AUTH;
182                     }
183                     else if (c==';' || c=='?' || c=='#')
184                     {
185                         i--;
186                         state=PATH;
187                     }  
188                     else
189                     {
190                         _host=m;
191                         _port=m;
192                         state=PATH;
193                     }  
194                     continue;
195                 }
196                 
197                 case SCHEME_OR_PATH:
198                 {
199                     // short cut for http and https
200                     if (length>6 && c=='t')
201                     {
202                         if (_raw[offset+3]==':')
203                         {
204                             s=offset+3;
205                             i=offset+4;
206                             c=':';
207                         }
208                         else if (_raw[offset+4]==':')
209                         {
210                             s=offset+4;
211                             i=offset+5;
212                             c=':';
213                         }
214                         else if (_raw[offset+5]==':')
215                         {
216                             s=offset+5;
217                             i=offset+6;
218                             c=':';
219                         }
220                     }
221                     
222                     switch (c)
223                     {
224                         case ':':
225                         {
226                             m = i++;
227                             _authority = m;
228                             _path = m;
229                             c = (char)(0xff & _raw[i]);
230                             if (c == '/')
231                                 state = AUTH_OR_PATH;
232                             else
233                             {
234                                 _host = m;
235                                 _port = m;
236                                 state = PATH;
237                             }
238                             break;
239                         }
240                         
241                         case '/':
242                         {
243                             state = PATH;
244                             break;
245                         }
246                         
247                         case ';':
248                         {
249                             _param = s;
250                             state = PARAM;
251                             break;
252                         }
253                         
254                         case '?':
255                         {
256                             _param = s;
257                             _query = s;
258                             state = QUERY;
259                             break;
260                         }
261                         
262                         case '#':
263                         {
264                             _param = s;
265                             _query = s;
266                             _fragment = s;
267                             break;
268                         }
269                     }
270                     continue;
271                 }
272                 
273                 case AUTH:
274                 {
275                     switch (c)
276                     {
277 
278                         case '/':
279                         {
280                             m = s;
281                             _path = m;
282                             _port = _path;
283                             state = PATH;
284                             break;
285                         }
286                         case '@':
287                         {
288                             _host = i;
289                             break;
290                         }
291                         case ':':
292                         {
293                             _port = s;
294                             state = PORT;
295                             break;
296                         }
297                         case '[':
298                         {
299                             state = IPV6;
300                             break;
301                         }
302                     }
303                     continue;
304                 }
305 
306                 case IPV6:
307                 {
308                     switch (c)
309                     {
310                         case '/':
311                         {
312                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
313                         }
314                         case ']':
315                         {
316                             state = AUTH;
317                             break;
318                         }
319                     }
320 
321                     continue;
322                 }
323                 
324                 case PORT:
325                 {
326                     if (c=='/')
327                     {
328                         m=s;
329                         _path=m;
330                         if (_port<=_authority)
331                             _port=_path;
332                         state=PATH;
333                     }
334                     continue;
335                 }
336                 
337                 case PATH:
338                 {
339                     switch (c)
340                     {
341                         case ';':
342                         {
343                             _param = s;
344                             state = PARAM;
345                             break;
346                         }
347                         case '?':
348                         {
349                             _param = s;
350                             _query = s;
351                             state = QUERY;
352                             break;
353                         }
354                         case '#':
355                         {
356                             _param = s;
357                             _query = s;
358                             _fragment = s;
359                             break state;
360                         }
361                         case '%':
362                         {
363                             _encoded=true;
364                         }
365                     }
366                     continue;
367                 }
368                 
369                 case PARAM:
370                 {
371                     switch (c)
372                     {
373                         case '?':
374                         {
375                             _query = s;
376                             state = QUERY;
377                             break;
378                         }
379                         case '#':
380                         {
381                             _query = s;
382                             _fragment = s;
383                             break state;
384                         }
385                     }
386                     continue;
387                 }
388                 
389                 case QUERY:
390                 {
391                     if (c=='#')
392                     {
393                         _fragment=s;
394                         break state;
395                     }
396                     continue;
397                 }
398                 
399                 case ASTERISK:
400                 {
401                     throw new IllegalArgumentException("only '*'");
402                 }
403             }
404         }
405     }
406     
407     private String toUtf8String(int offset,int length)
408     {
409         _utf8b.reset();
410         _utf8b.append(_raw,offset,length);
411         return _utf8b.toString();
412     }
413     
414     public String getScheme()
415     {
416         if (_scheme==_authority)
417             return null;
418         int l=_authority-_scheme;
419         if (l==5 && 
420             _raw[_scheme]=='h' && 
421             _raw[_scheme+1]=='t' && 
422             _raw[_scheme+2]=='t' && 
423             _raw[_scheme+3]=='p' )
424             return HttpSchemes.HTTP;
425         if (l==6 && 
426             _raw[_scheme]=='h' && 
427             _raw[_scheme+1]=='t' && 
428             _raw[_scheme+2]=='t' && 
429             _raw[_scheme+3]=='p' && 
430             _raw[_scheme+4]=='s' )
431             return HttpSchemes.HTTPS;
432         
433         return toUtf8String(_scheme,_authority-_scheme-1);
434     }
435     
436     public String getAuthority()
437     {
438         if (_authority==_path)
439             return null;
440         return toUtf8String(_authority,_path-_authority);
441     }
442     
443     public String getHost()
444     {
445         if (_host==_port)
446             return null;
447         return toUtf8String(_host,_port-_host);
448     }
449     
450     public int getPort()
451     {
452         if (_port==_path)
453             return -1;
454         return TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
455     }
456     
457     public String getPath()
458     {
459         if (_path==_param)
460             return null;
461         return toUtf8String(_path,_param-_path);
462     }
463     
464     public String getDecodedPath()
465     {
466         if (_path==_param)
467             return null;
468 
469         int length = _param-_path;
470         byte[] bytes=null;
471         int n=0;
472 
473         for (int i=_path;i<_param;i++)
474         {
475             byte b = _raw[i];
476             
477             if (b=='%' && (i+2)<_param)
478             {
479                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
480                 i+=2;
481             }
482             else if (bytes==null)
483             {
484                 n++;
485                 continue;
486             }
487             
488             if (bytes==null)
489             {
490                 bytes=new byte[length];
491                 for (int j=0;j<n;j++)
492                     bytes[j]=_raw[_path+j];
493             }
494             
495             bytes[n++]=b;
496         }
497 
498         if (bytes==null)
499             return toUtf8String(_path,length);
500 
501         _utf8b.reset();
502         _utf8b.append(bytes,0,n);
503         return _utf8b.toString();
504     }
505     
506     public String getPathAndParam()
507     {
508         if (_path==_query)
509             return null;
510         return toUtf8String(_path,_query-_path);
511     }
512     
513     public String getCompletePath()
514     {
515         if (_path==_end)
516             return null;
517         return toUtf8String(_path,_end-_path);
518     }
519     
520     public String getParam()
521     {
522         if (_param==_query)
523             return null;
524         return toUtf8String(_param+1,_query-_param-1);
525     }
526     
527     public String getQuery()
528     {
529         if (_query==_fragment)
530             return null;
531         return toUtf8String(_query+1,_fragment-_query-1);
532     }
533     
534     public String getQuery(String encoding)
535     {
536         if (_query==_fragment)
537             return null;
538         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
539     }
540     
541     public boolean hasQuery()
542     {
543         return (_fragment>_query);
544     }
545     
546     public String getFragment()
547     {
548         if (_fragment==_end)
549             return null;
550         return toUtf8String(_fragment+1,_end-_fragment-1);
551     }
552 
553     public void decodeQueryTo(MultiMap parameters) 
554     {
555         if (_query==_fragment)
556             return;
557         _utf8b.reset();
558         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
559     }
560 
561     public void decodeQueryTo(MultiMap parameters, String encoding) 
562         throws UnsupportedEncodingException
563     {
564         if (_query==_fragment)
565             return;
566        
567         if (encoding==null || StringUtil.isUTF8(encoding))
568             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
569         else
570             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
571     }
572 
573     public void clear()
574     {
575         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
576         _raw=__empty;
577         _rawString="";
578         _encoded=false;
579     }
580     
581     public String toString()
582     {
583         if (_rawString==null)
584             _rawString=toUtf8String(_scheme,_end-_scheme);
585         return _rawString;
586     }
587     
588     public void writeTo(Utf8StringBuilder buf)
589     {
590         buf.append(_raw,_scheme,_end-_scheme);
591     }
592     
593 }