159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
c2 = c;
c = *++z;
if( c2>=0x80 ){
if( ((c2<0xc2) || (c2>=0xf4) || ((c&0xc0)!=0x80)) &&
(((c2!=0xf4) || (c>=0x90)) && ((c2!=0xc0) || (c!=0x80))) ){
return LOOK_INVALID; /* Invalid UTF-8 */
}
c = (c2 >= 0xe0) ? (c2<<1)+1 : ' ';
}
}
return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
}
/*
|
>
>
>
>
>
>
>
>
>
>
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
c2 = c;
c = *++z;
if( c2>=0x80 ){
if( ((c2<0xc2) || (c2>=0xf4) || ((c&0xc0)!=0x80)) &&
(((c2!=0xf4) || (c>=0x90)) && ((c2!=0xc0) || (c!=0x80))) ){
return LOOK_INVALID; /* Invalid UTF-8 */
}
/* the first byte of the sequence is okay
** but we need to check the rest
** convert next byte to a prefix byte of the next shorter sequence
** or a simple space character if the two byte seq was valid
*/
c = (c2 >= 0xe0) ? (c2<<1)+1 : ' ';
/* edge case: if three byte sequence started with 0xe0
** it becomes 0xc1, which is a too short two byte sequence
** so fix it up to be the start of a valid two byte sequence
*/
if (c == 0xc1) c = 0xc2;
}
}
return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */
}
/*
|