1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.util;
16  
17  /* ------------------------------------------------------------ */
18  /** UTF-8 StringBuffer.
19   *
20   * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
21   * UTF-8 encoded bytes, that are converted into characters.
22   * 
23   * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
24   * state a character is appended to the string buffer.
25   * 
26   * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27   * The UTF-8 code was inspired by http://javolution.org
28   * 
29   * This class is not synchronised and should probably be called Utf8StringBuilder
30   */
31  public class Utf8StringBuffer 
32  {
33      StringBuffer _buffer;
34      int _more;
35      int _bits;
36      
37      public Utf8StringBuffer()
38      {
39          _buffer=new StringBuffer();
40      }
41      
42      public Utf8StringBuffer(int capacity)
43      {
44          _buffer=new StringBuffer(capacity);
45      }
46  
47      public void append(byte[] b,int offset, int length)
48      {
49          int end=offset+length;
50          for (int i=offset; i<end;i++)
51              append(b[i]);
52      }
53      
54      public void append(byte b)
55      {
56          if (b>=0)
57          {
58              if (_more>0)
59              {
60                  _buffer.append('?');
61                  _more=0;
62                  _bits=0;
63              }
64              else
65                  _buffer.append((char)(0x7f&b));
66          }
67          else if (_more==0)
68          {
69              if ((b&0xc0)!=0xc0)
70              {
71                  // 10xxxxxx
72                  _buffer.append('?');
73                  _more=0;
74                  _bits=0;
75              }
76              else
77  
78              { 
79                  if ((b & 0xe0) == 0xc0)
80                  {
81                      //110xxxxx
82                      _more=1;
83                      _bits=b&0x1f;
84                  }
85                  else if ((b & 0xf0) == 0xe0)
86                  {
87                      //1110xxxx
88                      _more=2;
89                      _bits=b&0x0f;
90                  }
91                  else if ((b & 0xf8) == 0xf0)
92                  {
93                      //11110xxx
94                      _more=3;
95                      _bits=b&0x07;
96                  }
97                  else if ((b & 0xfc) == 0xf8)
98                  {
99                      //111110xx
100                     _more=4;
101                     _bits=b&0x03;
102                 }
103                 else if ((b & 0xfe) == 0xfc) 
104                 {
105                     //1111110x
106                     _more=5;
107                     _bits=b&0x01;
108                 }
109                 else
110                 {
111                     throw new IllegalArgumentException();
112                 }
113                 
114                 if (_bits==0)
115                     throw new IllegalArgumentException("non-shortest UTF-8 form");
116             }
117         }
118         else
119         {
120             if ((b&0xc0)==0xc0)
121             {    // 11??????
122                 _buffer.append('?');
123                 _more=0;
124                 _bits=0;
125                 throw new IllegalArgumentException();
126             }
127             else
128             {
129                 // 10xxxxxx
130                 _bits=(_bits<<6)|(b&0x3f);
131                 if (--_more==0)
132                     _buffer.append((char)_bits);
133             }
134         }
135     }
136     
137     public int length()
138     {
139         return _buffer.length();
140     }
141     
142     public void reset()
143     {
144         _buffer.setLength(0);
145         _more=0;
146         _bits=0;
147     }
148     
149     public StringBuffer getStringBuffer()
150     {
151         if (_more!=0)
152             throw new IllegalStateException();
153         return _buffer;
154     }
155     
156     public String toString()
157     {
158         if (_more!=0)
159             throw new IllegalStateException();
160         return _buffer.toString();
161     }
162 }