1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.StringWriter;
21  import java.io.UnsupportedEncodingException;
22  import java.util.Iterator;
23  import java.util.Map;
24  
25  import org.mortbay.log.Log;
26  
27  
28  /* ------------------------------------------------------------ */
29  /** Handles coding of MIME  "x-www-form-urlencoded".
30   * This class handles the encoding and decoding for either
31   * the query string of a URL or the _content of a POST HTTP request.
32   *
33   * <p><h4>Notes</h4>
34   * The hashtable either contains String single values, vectors
35   * of String or arrays of Strings.
36   *
37   * This class is only partially synchronised.  In particular, simple
38   * get operations are not protected from concurrent updates.
39   *
40   * @see java.net.URLEncoder
41   * @author Greg Wilkins (gregw)
42   */
43  public class UrlEncoded extends MultiMap
44  {
45  
46      /* ----------------------------------------------------------------- */
47      public UrlEncoded(UrlEncoded url)
48      {
49          super(url);
50      }
51      
52      /* ----------------------------------------------------------------- */
53      public UrlEncoded()
54      {
55          super(6);
56      }
57      
58      /* ----------------------------------------------------------------- */
59      public UrlEncoded(String s)
60      {
61          super(6);
62          decode(s,StringUtil.__UTF8);
63      }
64      
65      /* ----------------------------------------------------------------- */
66      public UrlEncoded(String s, String charset)
67      {
68          super(6);
69          decode(s,charset);
70      }
71      
72      /* ----------------------------------------------------------------- */
73      public void decode(String query)
74      {
75          decodeTo(query,this,StringUtil.__UTF8);
76      }
77      
78      /* ----------------------------------------------------------------- */
79      public void decode(String query,String charset)
80      {
81          decodeTo(query,this,charset);
82      }
83      
84      /* -------------------------------------------------------------- */
85      /** Encode Hashtable with % encoding.
86       */
87      public String encode()
88      {
89          return encode(StringUtil.__UTF8,false);
90      }
91      
92      /* -------------------------------------------------------------- */
93      /** Encode Hashtable with % encoding.
94       */
95      public String encode(String charset)
96      {
97          return encode(charset,false);
98      }
99      
100     /* -------------------------------------------------------------- */
101     /** Encode Hashtable with % encoding.
102      * @param equalsForNullValue if True, then an '=' is always used, even
103      * for parameters without a value. e.g. "blah?a=&b=&c=".
104      */
105     public synchronized String encode(String charset, boolean equalsForNullValue)
106     {
107         return encode(this,charset,equalsForNullValue);
108     }
109     
110     /* -------------------------------------------------------------- */
111     /** Encode Hashtable with % encoding.
112      * @param equalsForNullValue if True, then an '=' is always used, even
113      * for parameters without a value. e.g. "blah?a=&b=&c=".
114      */
115     public static String encode(MultiMap map, String charset, boolean equalsForNullValue)
116     {
117         if (charset==null)
118             charset=StringUtil.__UTF8;
119 
120         StringBuilder result = new StringBuilder(128);
121 
122         Iterator iter = map.entrySet().iterator();
123         while(iter.hasNext())
124         {
125             Map.Entry entry = (Map.Entry)iter.next();
126 
127             String key = entry.getKey().toString();
128             Object list = entry.getValue();
129             int s=LazyList.size(list);
130 
131             if (s==0)
132             {
133                 result.append(encodeString(key,charset));
134                 if(equalsForNullValue)
135                     result.append('=');
136             }
137             else
138             {
139                 for (int i=0;i<s;i++)
140                 {
141                     if (i>0)
142                         result.append('&');
143                     Object val=LazyList.get(list,i);
144                     result.append(encodeString(key,charset));
145 
146                     if (val!=null)
147                     {
148                         String str=val.toString();
149                         if (str.length()>0)
150                         {
151                             result.append('=');
152                             result.append(encodeString(str,charset));
153                         }
154                         else if (equalsForNullValue)
155                             result.append('=');
156                     }
157                     else if (equalsForNullValue)
158                         result.append('=');
159                 }
160             }
161             if (iter.hasNext())
162                 result.append('&');
163         }
164         return result.toString();
165     }
166 
167 
168 
169     /* -------------------------------------------------------------- */
170     /** Decoded parameters to Map.
171      * @param content the string containing the encoded parameters
172      */
173     public static void decodeTo(String content, MultiMap map, String charset)
174     {
175         if (charset==null)
176             charset=StringUtil.__UTF8;
177 
178         synchronized(map)
179         {
180             String key = null;
181             String value = null;
182             int mark=-1;
183             boolean encoded=false;
184             for (int i=0;i<content.length();i++)
185             {
186                 char c = content.charAt(i);
187                 switch (c)
188                 {
189                   case '&':
190                       int l=i-mark-1;
191                       value = l==0?"":
192                           (encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1,i));
193                       mark=i;
194                       encoded=false;
195                       if (key != null)
196                       {
197                           map.add(key,value);
198                       }
199                       else if (value!=null&&value.length()>0)
200                       {
201                           map.add(value,"");
202                       }
203                       key = null;
204                       value=null;
205                       break;
206                   case '=':
207                       if (key!=null)
208                           break;
209                       key = encoded?decodeString(content,mark+1,i-mark-1,charset):content.substring(mark+1,i);
210                       mark=i;
211                       encoded=false;
212                       break;
213                   case '+':
214                       encoded=true;
215                       break;
216                   case '%':
217                       encoded=true;
218                       break;
219                 }                
220             }
221             
222             if (key != null)
223             {
224                 int l=content.length()-mark-1;
225                 value = l==0?"":(encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1));
226                 map.add(key,value);
227             }
228             else if (mark<content.length())
229             {
230                 key = encoded
231                     ?decodeString(content,mark+1,content.length()-mark-1,charset)
232                     :content.substring(mark+1);
233                 map.add(key,"");
234             }
235         }
236     }
237 
238     /* -------------------------------------------------------------- */
239     /** Decoded parameters to Map.
240      * @param data the byte[] containing the encoded parameters
241      */
242     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map)
243     {
244         synchronized(map)
245         {
246             Utf8StringBuffer buffer = new Utf8StringBuffer();
247             String key = null;
248             String value = null;
249             
250             // TODO cache of parameter names ???
251             int end=offset+length;
252             for (int i=offset;i<end;i++)
253             {
254                 byte b=raw[i];
255                 switch ((char)(0xff&b))
256                 {
257                     case '&':
258                         value = buffer.length()==0?"":buffer.toString();
259                         buffer.reset();
260                         if (key != null)
261                         {
262                             map.add(key,value);
263                         }
264                         else if (value!=null&&value.length()>0)
265                         {
266                             map.add(value,"");
267                         }
268                         key = null;
269                         value=null;
270                         break;
271                         
272                     case '=':
273                         if (key!=null)
274                         {
275                             buffer.append(b);
276                             break;
277                         }
278                         key = buffer.toString();
279                         buffer.reset();
280                         break;
281                         
282                     case '+':
283                         buffer.append((byte)' ');
284                         break;
285                         
286                     case '%':
287                         if (i+2<end)
288                             buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
289                         break;
290                     default:
291                         buffer.append(b);
292                     break;
293                 }
294             }
295             
296             if (key != null)
297             {
298                 value = buffer.length()==0?"":buffer.toString();
299                 buffer.reset();
300                 map.add(key,value);
301             }
302             else if (buffer.length()>0)
303             {
304                 map.add(buffer.toString(),"");
305             }
306         }
307     }
308     
309     /* -------------------------------------------------------------- */
310     /** Decoded parameters to Map.
311      * @param in InputSteam to read
312      * @param map MultiMap to add parameters to
313      * @param maxLength maximum length of conent to read 0r -1 for no limit
314      */
315     public static void decodeUtf8To(InputStream in, MultiMap map, int maxLength)
316     throws IOException
317     {
318         synchronized(map)
319         {
320             Utf8StringBuffer buffer = new Utf8StringBuffer();
321             String key = null;
322             String value = null;
323             
324             int b;
325             
326             // TODO cache of parameter names ???
327             int totalLength=0;
328             while ((b=in.read())>=0)
329             {
330                 switch ((char) b)
331                 {
332                     case '&':
333                         value = buffer.length()==0?"":buffer.toString();
334                         buffer.reset();
335                         if (key != null)
336                         {
337                             map.add(key,value);
338                         }
339                         else if (value!=null&&value.length()>0)
340                         {
341                             map.add(value,"");
342                         }
343                         key = null;
344                         value=null;
345                         break;
346                         
347                     case '=':
348                         if (key!=null)
349                         {
350                             buffer.append((byte)b);
351                             break;
352                         }
353                         key = buffer.toString();
354                         buffer.reset();
355                         break;
356                         
357                     case '+':
358                         buffer.append((byte)' ');
359                         break;
360                         
361                     case '%':
362                         int dh=in.read();
363                         int dl=in.read();
364                         if (dh<0||dl<0)
365                             break;
366                         buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
367                         break;
368                     default:
369                         buffer.append((byte)b);
370                     break;
371                 }
372                 if (maxLength>=0 && (++totalLength > maxLength))
373                     throw new IllegalStateException("Form too large");
374             }
375             
376             if (key != null)
377             {
378                 value = buffer.length()==0?"":buffer.toString();
379                 buffer.reset();
380                 map.add(key,value);
381             }
382             else if (buffer.length()>0)
383             {
384                 map.add(buffer.toString(), "");
385             }
386         }
387     }
388     
389     /* -------------------------------------------------------------- */
390     public static void decodeUtf16To(InputStream in, MultiMap map, int maxLength) throws IOException
391     {
392         InputStreamReader input = new InputStreamReader(in,StringUtil.__UTF16);
393         StringBuffer buf = new StringBuffer();
394 
395         int c;
396         int length=0;
397         if (maxLength<0)
398             maxLength=Integer.MAX_VALUE;
399         while ((c=input.read())>0 && length++<maxLength)
400             buf.append((char)c);
401         decodeTo(buf.toString(),map,StringUtil.__UTF8);
402     }
403     
404     /* -------------------------------------------------------------- */
405     /** Decoded parameters to Map.
406      * @param in the stream containing the encoded parameters
407      */
408     public static void decodeTo(InputStream in, MultiMap map, String charset, int maxLength)
409     throws IOException
410     {
411         if (charset==null || StringUtil.__UTF8.equalsIgnoreCase(charset) || StringUtil.__ISO_8859_1.equalsIgnoreCase(charset))
412         {
413             decodeUtf8To(in,map,maxLength);
414             return;
415         }
416         
417         if (StringUtil.__UTF16.equalsIgnoreCase(charset)) // Should be all 2 byte encodings
418         {
419             decodeUtf16To(in,map,maxLength);
420             return;
421         }
422         
423         synchronized(map)
424         {
425             String key = null;
426             String value = null;
427             
428             int c;
429             int digit=0;
430             int digits=0;
431             
432             int l=-1;
433             int totalLength = 0;
434             ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
435             
436             int size=0;
437             
438             while ((c=in.read())>0)
439             {
440                 switch ((char) c)
441                 {
442                     case '&':
443                         size=output.size();
444                         value = size==0?"":output.toString(charset);
445                         output.setCount(0);
446                         if (key != null)
447                         {
448                             map.add(key,value);
449                         }
450                         else if (value!=null&&value.length()>0)
451                         {
452                             map.add(value,"");
453                         }
454                         key = null;
455                         value=null;
456                         break;
457                     case '=':
458                         if (key!=null)
459                         {
460                             output.write(c);
461                             break;
462                         }
463                         size=output.size();
464                         key = size==0?"":output.toString(charset);
465                         output.setCount(0);
466                         break;
467                     case '+':
468                         output.write(' ');
469                         break;
470                     case '%':
471                         digits=2;
472                         break;
473                     default:
474                         if (digits==2)
475                         {
476                             digit=TypeUtil.convertHexDigit((byte)c);
477                             digits=1;
478                         }
479                         else if (digits==1)
480                         {
481                             output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
482                             digits=0;
483                         }
484                         else
485                             output.write(c);
486                     break;
487                 }
488                 
489                 totalLength++;
490                 if (maxLength>=0 && totalLength > maxLength)
491                     throw new IllegalStateException("Form too large");
492             }
493 
494             size=output.size();
495             if (key != null)
496             {
497                 value = size==0?"":output.toString(charset);
498                 output.setCount(0);
499                 map.add(key,value);
500             }
501             else if (size>0)
502                 map.add(output.toString(charset),"");
503         }
504     }
505     
506     /* -------------------------------------------------------------- */
507     /** Decode String with % encoding.
508      * This method makes the assumption that the majority of calls
509      * will need no decoding.
510      */
511     public static String decodeString(String encoded,int offset,int length,String charset)
512     {
513         if (charset==null)
514             charset=StringUtil.__UTF8;
515         byte[] bytes=null;
516         int n=0;
517         
518         for (int i=0;i<length;i++)
519         {
520             char c = encoded.charAt(offset+i);
521             if (c<0||c>0xff)
522                 throw new IllegalArgumentException("Not encoded");
523             
524             if (c=='+')
525             {
526                 if (bytes==null)
527                 {
528                     bytes=new byte[length*2];
529                     encoded.getBytes(offset, offset+i, bytes, 0);
530                     n=i;
531                 }
532                 bytes[n++] = (byte) ' ';
533             }
534             else if (c=='%' && (i+2)<length)
535             {
536                 byte b;
537                 char cn = encoded.charAt(offset+i+1);
538                 if (cn>='a' && cn<='z')
539                     b=(byte)(10+cn-'a');
540                 else if (cn>='A' && cn<='Z')
541                     b=(byte)(10+cn-'A');
542                 else
543                     b=(byte)(cn-'0');
544                 cn = encoded.charAt(offset+i+2);
545                 if (cn>='a' && cn<='z')
546                     b=(byte)(b*16+10+cn-'a');
547                 else if (cn>='A' && cn<='Z')
548                     b=(byte)(b*16+10+cn-'A');
549                 else
550                     b=(byte)(b*16+cn-'0');
551 
552                 if (bytes==null)
553                 {
554                     bytes=new byte[length];
555                     encoded.getBytes(offset, offset+i, bytes, 0);
556                     n=i;
557                 }
558                 i+=2;
559                 bytes[n++]=b;
560             }
561             else if (n>0)
562                 bytes[n++] = (byte) c;
563         }
564 
565         if (bytes==null)
566         {
567             if (offset==0 && encoded.length()==length)
568                 return encoded;
569             return encoded.substring(offset,offset+length);
570         }
571         
572         try
573         {
574             return new String(bytes,0,n,charset);
575         }
576         catch (UnsupportedEncodingException e)
577         {
578             Log.warn(e.toString());
579             Log.debug(e);
580             return new String(bytes,0,n);
581         }
582         
583     }
584     
585     /* ------------------------------------------------------------ */
586     /** Perform URL encoding.
587      * Assumes 8859 charset
588      * @param string 
589      * @return encoded string.
590      */
591     public static String encodeString(String string)
592     {
593         return encodeString(string,StringUtil.__UTF8);
594     }
595     
596     /* ------------------------------------------------------------ */
597     /** Perform URL encoding.
598      * @param string 
599      * @return encoded string.
600      */
601     public static String encodeString(String string,String charset)
602     {
603         if (charset==null)
604             charset=StringUtil.__UTF8;
605         byte[] bytes=null;
606         try
607         {
608             bytes=string.getBytes(charset);
609         }
610         catch(UnsupportedEncodingException e)
611         {
612             // Log.warn(LogSupport.EXCEPTION,e);
613             bytes=string.getBytes();
614         }
615         
616         int len=bytes.length;
617         byte[] encoded= new byte[bytes.length*3];
618         int n=0;
619         boolean noEncode=true;
620         
621         for (int i=0;i<len;i++)
622         {
623             byte b = bytes[i];
624             
625             if (b==' ')
626             {
627                 noEncode=false;
628                 encoded[n++]=(byte)'+';
629             }
630             else if (b>='a' && b<='z' ||
631                      b>='A' && b<='Z' ||
632                      b>='0' && b<='9')
633             {
634                 encoded[n++]=b;
635             }
636             else
637             {
638                 noEncode=false;
639                 encoded[n++]=(byte)'%';
640                 byte nibble= (byte) ((b&0xf0)>>4);
641                 if (nibble>=10)
642                     encoded[n++]=(byte)('A'+nibble-10);
643                 else
644                     encoded[n++]=(byte)('0'+nibble);
645                 nibble= (byte) (b&0xf);
646                 if (nibble>=10)
647                     encoded[n++]=(byte)('A'+nibble-10);
648                 else
649                     encoded[n++]=(byte)('0'+nibble);
650             }
651         }
652 
653         if (noEncode)
654             return string;
655         
656         try
657         {    
658             return new String(encoded,0,n,charset);
659         }
660         catch(UnsupportedEncodingException e)
661         {
662             // Log.warn(LogSupport.EXCEPTION,e);
663             return new String(encoded,0,n);
664         }
665     }
666 
667 
668     /* ------------------------------------------------------------ */
669     /** 
670      */
671     public Object clone()
672     {
673         return new UrlEncoded(this);
674     }
675 }