1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.StringWriter;
21  import java.io.UnsupportedEncodingException;
22  import java.util.Iterator;
23  import java.util.Map;
24  
25  import org.mortbay.log.Log;
26  
27  
28  /* ------------------------------------------------------------ */
29  /** Handles coding of MIME  "x-www-form-urlencoded".
30   * This class handles the encoding and decoding for either
31   * the query string of a URL or the _content of a POST HTTP request.
32   *
33   * <p><h4>Notes</h4>
34   * The hashtable either contains String single values, vectors
35   * of String or arrays of Strings.
36   *
37   * This class is only partially synchronised.  In particular, simple
38   * get operations are not protected from concurrent updates.
39   *
40   * @see java.net.URLEncoder
41   * @author Greg Wilkins (gregw)
42   */
43  public class UrlEncoded extends MultiMap
44  {
45  
46      /* ----------------------------------------------------------------- */
47      public UrlEncoded(UrlEncoded url)
48      {
49          super(url);
50      }
51      
52      /* ----------------------------------------------------------------- */
53      public UrlEncoded()
54      {
55          super(6);
56      }
57      
58      /* ----------------------------------------------------------------- */
59      public UrlEncoded(String s)
60      {
61          super(6);
62          decode(s,StringUtil.__UTF8);
63      }
64      
65      /* ----------------------------------------------------------------- */
66      public UrlEncoded(String s, String charset)
67      {
68          super(6);
69          decode(s,charset);
70      }
71      
72      /* ----------------------------------------------------------------- */
73      public void decode(String query)
74      {
75          decodeTo(query,this,StringUtil.__UTF8);
76      }
77      
78      /* ----------------------------------------------------------------- */
79      public void decode(String query,String charset)
80      {
81          decodeTo(query,this,charset);
82      }
83      
84      /* -------------------------------------------------------------- */
85      /** Encode Hashtable with % encoding.
86       */
87      public String encode()
88      {
89          return encode(StringUtil.__UTF8,false);
90      }
91      
92      /* -------------------------------------------------------------- */
93      /** Encode Hashtable with % encoding.
94       */
95      public String encode(String charset)
96      {
97          return encode(charset,false);
98      }
99      
100     /* -------------------------------------------------------------- */
101     /** Encode Hashtable with % encoding.
102      * @param equalsForNullValue if True, then an '=' is always used, even
103      * for parameters without a value. e.g. "blah?a=&b=&c=".
104      */
105     public synchronized String encode(String charset, boolean equalsForNullValue)
106     {
107         return encode(this,charset,equalsForNullValue);
108     }
109     
110     /* -------------------------------------------------------------- */
111     /** Encode Hashtable with % encoding.
112      * @param equalsForNullValue if True, then an '=' is always used, even
113      * for parameters without a value. e.g. "blah?a=&b=&c=".
114      */
115     public static String encode(MultiMap map, String charset, boolean equalsForNullValue)
116     {
117         if (charset==null)
118             charset=StringUtil.__UTF8;
119 
120         StringBuilder result = new StringBuilder(128);
121 
122         Iterator iter = map.entrySet().iterator();
123         while(iter.hasNext())
124         {
125             Map.Entry entry = (Map.Entry)iter.next();
126 
127             String key = entry.getKey().toString();
128             Object list = entry.getValue();
129             int s=LazyList.size(list);
130 
131             if (s==0)
132             {
133                 result.append(encodeString(key,charset));
134                 if(equalsForNullValue)
135                     result.append('=');
136             }
137             else
138             {
139                 for (int i=0;i<s;i++)
140                 {
141                     if (i>0)
142                         result.append('&');
143                     Object val=LazyList.get(list,i);
144                     result.append(encodeString(key,charset));
145 
146                     if (val!=null)
147                     {
148                         String str=val.toString();
149                         if (str.length()>0)
150                         {
151                             result.append('=');
152                             result.append(encodeString(str,charset));
153                         }
154                         else if (equalsForNullValue)
155                             result.append('=');
156                     }
157                     else if (equalsForNullValue)
158                         result.append('=');
159                 }
160             }
161             if (iter.hasNext())
162                 result.append('&');
163         }
164         return result.toString();
165     }
166 
167 
168 
169     /* -------------------------------------------------------------- */
170     /** Decoded parameters to Map.
171      * @param content the string containing the encoded parameters
172      */
173     public static void decodeTo(String content, MultiMap map, String charset)
174     {
175         if (charset==null)
176             charset=StringUtil.__UTF8;
177 
178         synchronized(map)
179         {
180             String key = null;
181             String value = null;
182             int mark=-1;
183             boolean encoded=false;
184             for (int i=0;i<content.length();i++)
185             {
186                 char c = content.charAt(i);
187                 switch (c)
188                 {
189                   case '&':
190                       int l=i-mark-1;
191                       value = l==0?"":
192                           (encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1,i));
193                       mark=i;
194                       encoded=false;
195                       if (key != null)
196                       {
197                           map.add(key,value);
198                       }
199                       else if (value!=null&&value.length()>0)
200                       {
201                           map.add(value,"");
202                       }
203                       key = null;
204                       value=null;
205                       break;
206                   case '=':
207                       if (key!=null)
208                           break;
209                       key = encoded?decodeString(content,mark+1,i-mark-1,charset):content.substring(mark+1,i);
210                       mark=i;
211                       encoded=false;
212                       break;
213                   case '+':
214                       encoded=true;
215                       break;
216                   case '%':
217                       encoded=true;
218                       break;
219                 }                
220             }
221             
222             if (key != null)
223             {
224                 int l=content.length()-mark-1;
225                 value = l==0?"":(encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1));
226                 map.add(key,value);
227             }
228             else if (mark<content.length())
229             {
230                 key = encoded
231                     ?decodeString(content,mark+1,content.length()-mark-1,charset)
232                     :content.substring(mark+1);
233                 map.add(key,"");
234             }
235         }
236     }
237 
238     /* -------------------------------------------------------------- */
239     /** Decoded parameters to Map.
240      * @param data the byte[] containing the encoded parameters
241      */
242     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map)
243     {
244         decodeUtf8To(raw,offset,length,map,new Utf8StringBuilder());
245     }
246 
247     /* -------------------------------------------------------------- */
248     /** Decoded parameters to Map.
249      * @param data the byte[] containing the encoded parameters
250      */
251     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map,Utf8StringBuilder buffer)
252     {
253         synchronized(map)
254         {
255             String key = null;
256             String value = null;
257             
258             // TODO cache of parameter names ???
259             int end=offset+length;
260             for (int i=offset;i<end;i++)
261             {
262                 byte b=raw[i];
263                 switch ((char)(0xff&b))
264                 {
265                     case '&':
266                         value = buffer.length()==0?"":buffer.toString();
267                         buffer.reset();
268                         if (key != null)
269                         {
270                             map.add(key,value);
271                         }
272                         else if (value!=null&&value.length()>0)
273                         {
274                             map.add(value,"");
275                         }
276                         key = null;
277                         value=null;
278                         break;
279                         
280                     case '=':
281                         if (key!=null)
282                         {
283                             buffer.append(b);
284                             break;
285                         }
286                         key = buffer.toString();
287                         buffer.reset();
288                         break;
289                         
290                     case '+':
291                         buffer.append((byte)' ');
292                         break;
293                         
294                     case '%':
295                         if (i+2<end)
296                             buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
297                         break;
298                     default:
299                         buffer.append(b);
300                     break;
301                 }
302             }
303             
304             if (key != null)
305             {
306                 value = buffer.length()==0?"":buffer.toString();
307                 buffer.reset();
308                 map.add(key,value);
309             }
310             else if (buffer.length()>0)
311             {
312                 map.add(buffer.toString(),"");
313             }
314         }
315     }
316 
317     /* -------------------------------------------------------------- */
318     /** Decoded parameters to Map.
319      * @param in InputSteam to read
320      * @param map MultiMap to add parameters to
321      * @param maxLength maximum length of content to read 0r -1 for no limit
322      */
323     public static void decode88591To(InputStream in, MultiMap map, int maxLength)
324     throws IOException
325     {
326         synchronized(map)
327         {
328             StringBuffer buffer = new StringBuffer();
329             String key = null;
330             String value = null;
331             
332             int b;
333 
334             // TODO cache of parameter names ???
335             int totalLength=0;
336             while ((b=in.read())>=0)
337             {
338                 switch ((char) b)
339                 {
340                     case '&':
341                         value = buffer.length()==0?"":buffer.toString();
342                         buffer.setLength(0);
343                         if (key != null)
344                         {
345                             map.add(key,value);
346                         }
347                         else if (value!=null&&value.length()>0)
348                         {
349                             map.add(value,"");
350                         }
351                         key = null;
352                         value=null;
353                         break;
354                         
355                     case '=':
356                         if (key!=null)
357                         {
358                             buffer.append((char)b);
359                             break;
360                         }
361                         key = buffer.toString();
362                         buffer.setLength(0);
363                         break;
364                         
365                     case '+':
366                         buffer.append((char)' ');
367                         break;
368                         
369                     case '%':
370                         int dh=in.read();
371                         int dl=in.read();
372                         if (dh<0||dl<0)
373                             break;
374                         buffer.append((char)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
375                         break;
376                     default:
377                         buffer.append((char)b);
378                     break;
379                 }
380                 if (maxLength>=0 && (++totalLength > maxLength))
381                     throw new IllegalStateException("Form too large");
382             }
383             
384             if (key != null)
385             {
386                 value = buffer.length()==0?"":buffer.toString();
387                 buffer.setLength(0);
388                 map.add(key,value);
389             }
390             else if (buffer.length()>0)
391             {
392                 map.add(buffer.toString(), "");
393             }
394         }
395     }
396     
397     /* -------------------------------------------------------------- */
398     /** Decoded parameters to Map.
399      * @param in InputSteam to read
400      * @param map MultiMap to add parameters to
401      * @param maxLength maximum length of conent to read 0r -1 for no limit
402      */
403     public static void decodeUtf8To(InputStream in, MultiMap map, int maxLength)
404     throws IOException
405     {
406         synchronized(map)
407         {
408             Utf8StringBuilder buffer = new Utf8StringBuilder();
409             String key = null;
410             String value = null;
411             
412             int b;
413             
414             // TODO cache of parameter names ???
415             int totalLength=0;
416             while ((b=in.read())>=0)
417             {
418                 switch ((char) b)
419                 {
420                     case '&':
421                         value = buffer.length()==0?"":buffer.toString();
422                         buffer.reset();
423                         if (key != null)
424                         {
425                             map.add(key,value);
426                         }
427                         else if (value!=null&&value.length()>0)
428                         {
429                             map.add(value,"");
430                         }
431                         key = null;
432                         value=null;
433                         break;
434                         
435                     case '=':
436                         if (key!=null)
437                         {
438                             buffer.append((byte)b);
439                             break;
440                         }
441                         key = buffer.toString();
442                         buffer.reset();
443                         break;
444                         
445                     case '+':
446                         buffer.append((byte)' ');
447                         break;
448                         
449                     case '%':
450                         int dh=in.read();
451                         int dl=in.read();
452                         if (dh<0||dl<0)
453                             break;
454                         buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
455                         break;
456                     default:
457                         buffer.append((byte)b);
458                     break;
459                 }
460                 if (maxLength>=0 && (++totalLength > maxLength))
461                     throw new IllegalStateException("Form too large");
462             }
463             
464             if (key != null)
465             {
466                 value = buffer.length()==0?"":buffer.toString();
467                 buffer.reset();
468                 map.add(key,value);
469             }
470             else if (buffer.length()>0)
471             {
472                 map.add(buffer.toString(), "");
473             }
474         }
475     }
476     
477     /* -------------------------------------------------------------- */
478     public static void decodeUtf16To(InputStream in, MultiMap map, int maxLength) throws IOException
479     {
480         InputStreamReader input = new InputStreamReader(in,StringUtil.__UTF16);
481         StringBuffer buf = new StringBuffer();
482 
483         int c;
484         int length=0;
485         if (maxLength<0)
486             maxLength=Integer.MAX_VALUE;
487         while ((c=input.read())>0 && length++<maxLength)
488             buf.append((char)c);
489         decodeTo(buf.toString(),map,StringUtil.__UTF8);
490     }
491     
492     /* -------------------------------------------------------------- */
493     /** Decoded parameters to Map.
494      * @param in the stream containing the encoded parameters
495      */
496     public static void decodeTo(InputStream in, MultiMap map, String charset, int maxLength)
497     throws IOException
498     {
499         if (charset==null || StringUtil.__ISO_8859_1.equals(charset))
500         {
501             decode88591To(in,map,maxLength);
502             return;
503         }
504 
505         if (StringUtil.__UTF8.equalsIgnoreCase(charset))
506         {
507             decodeUtf8To(in,map,maxLength);
508             return;
509         }
510 
511         if (StringUtil.__UTF16.equalsIgnoreCase(charset)) // Should be all 2 byte encodings
512         {
513             decodeUtf16To(in,map,maxLength);
514             return;
515         }
516         
517 
518         synchronized(map)
519         {
520             String key = null;
521             String value = null;
522             
523             int c;
524             int digit=0;
525             int digits=0;
526             
527             int totalLength = 0;
528             ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
529             
530             int size=0;
531             
532             while ((c=in.read())>0)
533             {
534                 switch ((char) c)
535                 {
536                     case '&':
537                         size=output.size();
538                         value = size==0?"":output.toString(charset);
539                         output.setCount(0);
540                         if (key != null)
541                         {
542                             map.add(key,value);
543                         }
544                         else if (value!=null&&value.length()>0)
545                         {
546                             map.add(value,"");
547                         }
548                         key = null;
549                         value=null;
550                         break;
551                     case '=':
552                         if (key!=null)
553                         {
554                             output.write(c);
555                             break;
556                         }
557                         size=output.size();
558                         key = size==0?"":output.toString(charset);
559                         output.setCount(0);
560                         break;
561                     case '+':
562                         output.write(' ');
563                         break;
564                     case '%':
565                         digits=2;
566                         break;
567                     default:
568                         if (digits==2)
569                         {
570                             digit=TypeUtil.convertHexDigit((byte)c);
571                             digits=1;
572                         }
573                         else if (digits==1)
574                         {
575                             output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
576                             digits=0;
577                         }
578                         else
579                             output.write(c);
580                     break;
581                 }
582                 
583                 totalLength++;
584                 if (maxLength>=0 && totalLength > maxLength)
585                     throw new IllegalStateException("Form too large");
586             }
587 
588             size=output.size();
589             if (key != null)
590             {
591                 value = size==0?"":output.toString(charset);
592                 output.setCount(0);
593                 map.add(key,value);
594             }
595             else if (size>0)
596                 map.add(output.toString(charset),"");
597         }
598     }
599     
600     /* -------------------------------------------------------------- */
601     /** Decode String with % encoding.
602      * This method makes the assumption that the majority of calls
603      * will need no decoding.
604      */
605     public static String decodeString(String encoded,int offset,int length,String charset)
606     {
607         if (charset==null)
608             charset=StringUtil.__UTF8;
609         byte[] bytes=null;
610         int n=0;
611         
612         for (int i=0;i<length;i++)
613         {
614             char c = encoded.charAt(offset+i);
615             if (c<0||c>0xff)
616                 throw new IllegalArgumentException("Not encoded");
617             
618             if (c=='+')
619             {
620                 if (bytes==null)
621                 {
622                     bytes=new byte[length*2];
623                     encoded.getBytes(offset, offset+i, bytes, 0);
624                     n=i;
625                 }
626                 bytes[n++] = (byte) ' ';
627             }
628             else if (c=='%' && (i+2)<length)
629             {
630                 byte b;
631                 char cn = encoded.charAt(offset+i+1);
632                 if (cn>='a' && cn<='z')
633                     b=(byte)(10+cn-'a');
634                 else if (cn>='A' && cn<='Z')
635                     b=(byte)(10+cn-'A');
636                 else
637                     b=(byte)(cn-'0');
638                 cn = encoded.charAt(offset+i+2);
639                 if (cn>='a' && cn<='z')
640                     b=(byte)(b*16+10+cn-'a');
641                 else if (cn>='A' && cn<='Z')
642                     b=(byte)(b*16+10+cn-'A');
643                 else
644                     b=(byte)(b*16+cn-'0');
645 
646                 if (bytes==null)
647                 {
648                     bytes=new byte[length];
649                     encoded.getBytes(offset, offset+i, bytes, 0);
650                     n=i;
651                 }
652                 i+=2;
653                 bytes[n++]=b;
654             }
655             else if (n>0)
656                 bytes[n++] = (byte) c;
657         }
658 
659         if (bytes==null)
660         {
661             if (offset==0 && encoded.length()==length)
662                 return encoded;
663             return encoded.substring(offset,offset+length);
664         }
665         
666         try
667         {
668             return new String(bytes,0,n,charset);
669         }
670         catch (UnsupportedEncodingException e)
671         {
672             Log.warn(e.toString());
673             Log.debug(e);
674             return new String(bytes,0,n);
675         }
676         
677     }
678     
679     /* ------------------------------------------------------------ */
680     /** Perform URL encoding.
681      * Assumes 8859 charset
682      * @param string 
683      * @return encoded string.
684      */
685     public static String encodeString(String string)
686     {
687         return encodeString(string,StringUtil.__UTF8);
688     }
689     
690     /* ------------------------------------------------------------ */
691     /** Perform URL encoding.
692      * @param string 
693      * @return encoded string.
694      */
695     public static String encodeString(String string,String charset)
696     {
697         if (charset==null)
698             charset=StringUtil.__UTF8;
699         byte[] bytes=null;
700         try
701         {
702             bytes=string.getBytes(charset);
703         }
704         catch(UnsupportedEncodingException e)
705         {
706             // Log.warn(LogSupport.EXCEPTION,e);
707             bytes=string.getBytes();
708         }
709         
710         int len=bytes.length;
711         byte[] encoded= new byte[bytes.length*3];
712         int n=0;
713         boolean noEncode=true;
714         
715         for (int i=0;i<len;i++)
716         {
717             byte b = bytes[i];
718             
719             if (b==' ')
720             {
721                 noEncode=false;
722                 encoded[n++]=(byte)'+';
723             }
724             else if (b>='a' && b<='z' ||
725                      b>='A' && b<='Z' ||
726                      b>='0' && b<='9')
727             {
728                 encoded[n++]=b;
729             }
730             else
731             {
732                 noEncode=false;
733                 encoded[n++]=(byte)'%';
734                 byte nibble= (byte) ((b&0xf0)>>4);
735                 if (nibble>=10)
736                     encoded[n++]=(byte)('A'+nibble-10);
737                 else
738                     encoded[n++]=(byte)('0'+nibble);
739                 nibble= (byte) (b&0xf);
740                 if (nibble>=10)
741                     encoded[n++]=(byte)('A'+nibble-10);
742                 else
743                     encoded[n++]=(byte)('0'+nibble);
744             }
745         }
746 
747         if (noEncode)
748             return string;
749         
750         try
751         {    
752             return new String(encoded,0,n,charset);
753         }
754         catch(UnsupportedEncodingException e)
755         {
756             // Log.warn(LogSupport.EXCEPTION,e);
757             return new String(encoded,0,n);
758         }
759     }
760 
761 
762     /* ------------------------------------------------------------ */
763     /** 
764      */
765     public Object clone()
766     {
767         return new UrlEncoded(this);
768     }
769 }