1 //========================================================================
2 //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3 //------------------------------------------------------------------------
4 //Licensed under the Apache License, Version 2.0 (the "License");
5 //you may not use this file except in compliance with the License.
6 //You may obtain a copy of the License at
7 //http://www.apache.org/licenses/LICENSE-2.0
8 //Unless required by applicable law or agreed to in writing, software
9 //distributed under the License is distributed on an "AS IS" BASIS,
10 //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 //See the License for the specific language governing permissions and
12 //limitations under the License.
13 //========================================================================
14
15 package org.mortbay.util;
16
17 /* ------------------------------------------------------------ */
18 /** UTF-8 StringBuffer.
19 *
20 * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
21 * UTF-8 encoded bytes, that are converted into characters.
22 *
23 * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
24 * state a character is appended to the string buffer.
25 *
26 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27 * The UTF-8 code was inspired by http://javolution.org
28 *
29 * This class is not synchronised and should probably be called Utf8StringBuilder
30 */
31 public class Utf8StringBuffer
32 {
33 StringBuffer _buffer;
34 int _more;
35 int _bits;
36
37 public Utf8StringBuffer()
38 {
39 _buffer=new StringBuffer();
40 }
41
42 public Utf8StringBuffer(int capacity)
43 {
44 _buffer=new StringBuffer(capacity);
45 }
46
47 public void append(byte[] b,int offset, int length)
48 {
49 int end=offset+length;
50 for (int i=offset; i<end;i++)
51 append(b[i]);
52 }
53
54 public void append(byte b)
55 {
56 if (b>=0)
57 {
58 if (_more>0)
59 {
60 _buffer.append('?');
61 _more=0;
62 _bits=0;
63 }
64 else
65 _buffer.append((char)(0x7f&b));
66 }
67 else if (_more==0)
68 {
69 if ((b&0xc0)!=0xc0)
70 {
71 // 10xxxxxx
72 _buffer.append('?');
73 _more=0;
74 _bits=0;
75 }
76 else
77
78 {
79 if ((b & 0xe0) == 0xc0)
80 {
81 //110xxxxx
82 _more=1;
83 _bits=b&0x1f;
84 }
85 else if ((b & 0xf0) == 0xe0)
86 {
87 //1110xxxx
88 _more=2;
89 _bits=b&0x0f;
90 }
91 else if ((b & 0xf8) == 0xf0)
92 {
93 //11110xxx
94 _more=3;
95 _bits=b&0x07;
96 }
97 else if ((b & 0xfc) == 0xf8)
98 {
99 //111110xx
100 _more=4;
101 _bits=b&0x03;
102 }
103 else if ((b & 0xfe) == 0xfc)
104 {
105 //1111110x
106 _more=5;
107 _bits=b&0x01;
108 }
109 else
110 {
111 throw new IllegalArgumentException();
112 }
113
114 if (_bits==0)
115 throw new IllegalArgumentException("non-shortest UTF-8 form");
116 }
117 }
118 else
119 {
120 if ((b&0xc0)==0xc0)
121 { // 11??????
122 _buffer.append('?');
123 _more=0;
124 _bits=0;
125 throw new IllegalArgumentException();
126 }
127 else
128 {
129 // 10xxxxxx
130 _bits=(_bits<<6)|(b&0x3f);
131 if (--_more==0)
132 _buffer.append((char)_bits);
133 }
134 }
135 }
136
137 public int length()
138 {
139 return _buffer.length();
140 }
141
142 public void reset()
143 {
144 _buffer.setLength(0);
145 _more=0;
146 _bits=0;
147 }
148
149 public StringBuffer getStringBuffer()
150 {
151 if (_more!=0)
152 throw new IllegalStateException();
153 return _buffer;
154 }
155
156 public String toString()
157 {
158 if (_more!=0)
159 throw new IllegalStateException();
160 return _buffer.toString();
161 }
162 }