1 //========================================================================
2 //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3 //------------------------------------------------------------------------
4 //Licensed under the Apache License, Version 2.0 (the "License");
5 //you may not use this file except in compliance with the License.
6 //You may obtain a copy of the License at
7 //http://www.apache.org/licenses/LICENSE-2.0
8 //Unless required by applicable law or agreed to in writing, software
9 //distributed under the License is distributed on an "AS IS" BASIS,
10 //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 //See the License for the specific language governing permissions and
12 //limitations under the License.
13 //========================================================================
14
15 package org.mortbay.util;
16
17 /* ------------------------------------------------------------ */
18 /** UTF-8 StringBuilder.
19 *
20 * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
21 * UTF-8 encoded bytes, that are converted into characters.
22 *
23 * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
24 * state a character is appended to the string buffer.
25 *
26 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27 * The UTF-8 code was inspired by http://javolution.org
28 *
29 */
30 public class Utf8StringBuilder
31 {
32 StringBuilder _buffer;
33 int _more;
34 int _bits;
35
36 public Utf8StringBuilder()
37 {
38 _buffer=new StringBuilder();
39 }
40
41 public Utf8StringBuilder(int capacity)
42 {
43 _buffer=new StringBuilder(capacity);
44 }
45
46 public void append(byte[] b,int offset, int length)
47 {
48 int end=offset+length;
49 for (int i=offset; i<end;i++)
50 append(b[i]);
51 }
52
53 public void append(byte b)
54 {
55 if (b>=0)
56 {
57 if (_more>0)
58 {
59 _buffer.append('?');
60 _more=0;
61 _bits=0;
62 }
63 else
64 _buffer.append((char)(0x7f&b));
65 }
66 else if (_more==0)
67 {
68 if ((b&0xc0)!=0xc0)
69 {
70 // 10xxxxxx
71 _buffer.append('?');
72 _more=0;
73 _bits=0;
74 }
75 else
76
77 {
78 if ((b & 0xe0) == 0xc0)
79 {
80 //110xxxxx
81 _more=1;
82 _bits=b&0x1f;
83 }
84 else if ((b & 0xf0) == 0xe0)
85 {
86 //1110xxxx
87 _more=2;
88 _bits=b&0x0f;
89 }
90 else if ((b & 0xf8) == 0xf0)
91 {
92 //11110xxx
93 _more=3;
94 _bits=b&0x07;
95 }
96 else if ((b & 0xfc) == 0xf8)
97 {
98 //111110xx
99 _more=4;
100 _bits=b&0x03;
101 }
102 else if ((b & 0xfe) == 0xfc)
103 {
104 //1111110x
105 _more=5;
106 _bits=b&0x01;
107 }
108 else
109 {
110 throw new IllegalArgumentException();
111 }
112
113 if (_bits==0)
114 throw new IllegalArgumentException("non-shortest UTF-8 form");
115 }
116 }
117 else
118 {
119 if ((b&0xc0)==0xc0)
120 { // 11??????
121 _buffer.append('?');
122 _more=0;
123 _bits=0;
124 throw new IllegalArgumentException();
125 }
126 else
127 {
128 // 10xxxxxx
129 _bits=(_bits<<6)|(b&0x3f);
130 if (--_more==0)
131 _buffer.append((char)_bits);
132 }
133 }
134 }
135
136 public int length()
137 {
138 return _buffer.length();
139 }
140
141 public void reset()
142 {
143 _buffer.setLength(0);
144 _more=0;
145 _bits=0;
146 }
147
148 public StringBuilder getStringBuilder()
149 {
150 if (_more!=0)
151 throw new IllegalStateException();
152 return _buffer;
153 }
154
155 public String toString()
156 {
157 if (_more!=0)
158 throw new IllegalStateException();
159 return _buffer.toString();
160 }
161 }