142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
|
** Java and Tcl use it. This function also considers valid
** the derivatives CESU-8 & WTF-8 (as described in the same
** wikipedia article referenced previously).
*/
/* definitions for various UTF-8 sequence lengths */
static const unsigned char us2a[] = {
2, 0xC0, 0xC0, 0x80, 0x80
2, 0x80, 0x80
};
static const unsigned char us2b[] = {
2, 0xC2, 0xDF, 0x80, 0xBF
2, 0x80, 0xBF
};
static const unsigned char us3a[] = {
3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF
3, 0xA0, 0xBF, 0x80, 0xBF
};
static const unsigned char us3b[] = {
3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF
3, 0x80, 0xBF, 0x80, 0xBF
};
static const unsigned char us4a[] = {
4, 0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
};
static const unsigned char us4b[] = {
4, 0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
};
static const unsigned char us4c[] = {
4, 0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
};
/* a table used for quick lookup of the definition that goes with a
* particular lead byte */
static const unsigned char* lb_tab[] = {
static const unsigned char* const lb_tab[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|