1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.util;
16  
17  /* ------------------------------------------------------------ */
18  /** UTF-8 StringBuilder.
19   *
20   * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
21   * UTF-8 encoded bytes, that are converted into characters.
22   * 
23   * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
24   * state a character is appended to the string buffer.
25   * 
26   * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27   * The UTF-8 code was inspired by http://javolution.org
28   * 
29   */
30  public class Utf8StringBuilder 
31  {
32      StringBuilder _buffer;
33      int _more;
34      int _bits;
35      
36      public Utf8StringBuilder()
37      {
38          _buffer=new StringBuilder();
39      }
40      
41      public Utf8StringBuilder(int capacity)
42      {
43          _buffer=new StringBuilder(capacity);
44      }
45  
46      public void append(byte[] b,int offset, int length)
47      {
48          int end=offset+length;
49          for (int i=offset; i<end;i++)
50              append(b[i]);
51      }
52      
53      public void append(byte b)
54      {
55          if (b>=0)
56          {
57              if (_more>0)
58              {
59                  _buffer.append('?');
60                  _more=0;
61                  _bits=0;
62              }
63              else
64                  _buffer.append((char)(0x7f&b));
65          }
66          else if (_more==0)
67          {
68              if ((b&0xc0)!=0xc0)
69              {
70                  // 10xxxxxx
71                  _buffer.append('?');
72                  _more=0;
73                  _bits=0;
74              }
75              else
76  
77              { 
78                  if ((b & 0xe0) == 0xc0)
79                  {
80                      //110xxxxx
81                      _more=1;
82                      _bits=b&0x1f;
83                  }
84                  else if ((b & 0xf0) == 0xe0)
85                  {
86                      //1110xxxx
87                      _more=2;
88                      _bits=b&0x0f;
89                  }
90                  else if ((b & 0xf8) == 0xf0)
91                  {
92                      //11110xxx
93                      _more=3;
94                      _bits=b&0x07;
95                  }
96                  else if ((b & 0xfc) == 0xf8)
97                  {
98                      //111110xx
99                      _more=4;
100                     _bits=b&0x03;
101                 }
102                 else if ((b & 0xfe) == 0xfc) 
103                 {
104                     //1111110x
105                     _more=5;
106                     _bits=b&0x01;
107                 }
108                 else
109                 {
110                     throw new IllegalArgumentException();
111                 }
112                 
113                 if (_bits==0)
114                     throw new IllegalArgumentException("non-shortest UTF-8 form");
115             }
116         }
117         else
118         {
119             if ((b&0xc0)==0xc0)
120             {    // 11??????
121                 _buffer.append('?');
122                 _more=0;
123                 _bits=0;
124                 throw new IllegalArgumentException();
125             }
126             else
127             {
128                 // 10xxxxxx
129                 _bits=(_bits<<6)|(b&0x3f);
130                 if (--_more==0)
131                     _buffer.append((char)_bits);
132             }
133         }
134     }
135     
136     public int length()
137     {
138         return _buffer.length();
139     }
140     
141     public void reset()
142     {
143         _buffer.setLength(0);
144         _more=0;
145         _bits=0;
146     }
147     
148     public StringBuilder getStringBuilder()
149     {
150         if (_more!=0)
151             throw new IllegalStateException();
152         return _buffer;
153     }
154     
155     public String toString()
156     {
157         if (_more!=0)
158             throw new IllegalStateException();
159         return _buffer.toString();
160     }
161 }