1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.util;
16  
17  /* ------------------------------------------------------------ */
18  /** UTF-8 StringBuffer.
19   *
20   * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
21   * UTF-8 encoded bytes, that are converted into characters.
22   * 
23   * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
24   * state a character is appended to the string buffer.
25   * 
26   * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27   * The UTF-8 code was inspired by http://javolution.org
28   * 
29   * This class is not synchronized and should probably be called Utf8StringBuilder
30   */
31  public class Utf8StringBuffer 
32  {
33      StringBuffer _buffer;
34      int _more;
35      int _bits;
36      boolean _errors;
37      
38      Utf8StringBuffer()
39      {
40          _buffer=new StringBuffer();
41      }
42      
43      Utf8StringBuffer(int capacity)
44      {
45          _buffer=new StringBuffer(capacity);
46      }
47  
48      public void append(byte[] b,int offset, int length)
49      {
50          int end=offset+length;
51          for (int i=offset; i<end;i++)
52              append(b[i]);
53      }
54      
55      public void append(byte b)
56      {
57          if (b>0)
58          {
59              if (_more>0)
60              {
61                  _buffer.append('?');
62                  _more=0;
63                  _bits=0;
64              }
65              else
66                  _buffer.append((char)(0x7f&b));
67          }
68          else if (_more==0)
69          {
70              if ((b&0xc0)!=0xc0)
71              {
72                  // 10xxxxxx
73                  _buffer.append('?');
74                  _more=0;
75                  _bits=0;
76              }
77              else if ((b & 0xe0) == 0xc0)
78              {
79                  //110xxxxx
80                  _more=1;
81                  _bits=b&0x1f;
82              }
83              else if ((b & 0xf0) == 0xe0)
84              {
85                  //1110xxxx
86                  _more=2;
87                  _bits=b&0x0f;
88              }
89              else if ((b & 0xf8) == 0xf0)
90              {
91                  //11110xxx
92                  _more=3;
93                  _bits=b&0x07;
94              }
95              else if ((b & 0xfc) == 0xf8)
96              {
97                  //111110xx
98                  _more=4;
99                  _bits=b&0x03;
100             }
101             else if ((b & 0xfe) == 0xfc) 
102             {
103                 //1111110x
104                 _more=5;
105                 _bits=b&0x01;
106             }
107         }
108         else
109         {
110             if ((b&0xc0)==0xc0)
111             {    // 11??????
112                 _buffer.append('?');
113                 _more=0;
114                 _bits=0;
115                 _errors=true;
116             }
117             else
118             {
119                 // 10xxxxxx
120                 _bits=(_bits<<6)|(b&0x3f);
121                 if (--_more==0)
122                     _buffer.append((char)_bits);
123             }
124         }
125     }
126     
127     public int length()
128     {
129         return _buffer.length();
130     }
131     
132     public void reset()
133     {
134         _buffer.setLength(0);
135         _more=0;
136         _bits=0;
137         _errors=false;
138     }
139     
140     public StringBuffer getStringBuffer()
141     {
142         return _buffer;
143     }
144     
145     public String toString()
146     {
147         return _buffer.toString();
148     }
149     
150     /* ------------------------------------------------------------ */
151     /** 
152      * @return True if there are non UTF-8 characters or incomplete UTF-8 characters in the buffer.
153      */
154     public boolean isError()
155     {
156         return _errors || _more>0;
157     }
158 }