Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Quick test whether the `cli_wcwidth()' function [https://sqlite.org/src/vdiff?branch=variable-width-char | recently added to the SQLite shell] can be used by the comment formatter to take character widths into account when calculating word-break positions. TODOs: (0) Fix the "modern" (i.e. non-legacy) comment formatter being off by one if a fullwidth character only fits partially. (1) Add tests for the comment formatters with non-ASCII input. (2) Implement a modified `decodeUtf8()' function (which is static, anyway) that also accepts single-byte UTF-8 characters and may allow for some simplifications to the comment formatter algorithms. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | comment-formatter-wcwidth |
| Files: | files | file ages | folders |
| SHA3-256: |
b2dbdc8afbff1c162400696cae7e8a80 |
| User & Date: | florian 2024-09-27 04:52:00.000 |
Context
|
2024-09-28
| ||
| 18:19 | Copy the wcwidth() implementation from SQLite over into comformat.c, so that comformat.c does not depend on SQLite. Fix the comformat routine so that it does not begin a new line with spaces. Closed-Leaf check-in: 7b581b48a0 user: drh tags: comment-formatter-wcwidth | |
|
2024-09-27
| ||
| 04:52 | Quick test whether the `cli_wcwidth()' function [https://sqlite.org/src/vdiff?branch=variable-width-char | recently added to the SQLite shell] can be used by the comment formatter to take character widths into account when calculating word-break positions. TODOs: (0) Fix the "modern" (i.e. non-legacy) comment formatter being off by one if a fullwidth character only fits partially. (1) Add tests for the comment formatters with non-ASCII input. (2) Implement a modified `decodeUtf8()' function (which is static, anyway) that also accepts single-byte UTF-8 characters and may allow for some simplifications to the comment formatter algorithms. check-in: b2dbdc8afb user: florian tags: comment-formatter-wcwidth | |
|
2024-09-26
| ||
| 19:49 | Merge the latest SQLite enhancements, and in particular the new ".www" dot-command available to "fossil sql". check-in: c20aa86727 user: drh tags: trunk | |
Changes
Changes to extsrc/shell.c.
| ︙ | ︙ | |||
1022 1023 1024 1025 1026 1027 1028 | /* ** Compute the value and length of a multi-byte UTF-8 character that ** begins at z[0]. Return the length. Write the Unicode value into *pU. ** ** This routine only works for *multi-byte* UTF-8 characters. */ | | | 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 |
/*
** Compute the value and length of a multi-byte UTF-8 character that
** begins at z[0]. Return the length. Write the Unicode value into *pU.
**
** This routine only works for *multi-byte* UTF-8 characters.
*/
int decodeUtf8(const unsigned char *z, int *pU){
if( (z[0] & 0xe0)==0xc0 && (z[1] & 0xc0)==0x80 ){
*pU = ((z[0] & 0x1f)<<6) | (z[1] & 0x3f);
return 2;
}
if( (z[0] & 0xf0)==0xe0 && (z[1] & 0xc0)==0x80 && (z[2] & 0xc0)==0x80 ){
*pU = ((z[0] & 0x0f)<<12) | ((z[1] & 0x3f)<<6) | (z[2] & 0x3f);
return 3;
|
| ︙ | ︙ |
Changes to src/comformat.c.
| ︙ | ︙ | |||
292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
while( cchUTF8<maxUTF8 &&
(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[iBuf++] = zLine[index++];
}
maxChars -= useChars;
if( maxChars<=0 ) break;
if( c=='\n' ) break;
}
if( charCnt>0 ){
zBuf[iBuf++] = '\n';
lineCnt++;
| > > > > > | 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
while( cchUTF8<maxUTF8 &&
(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[iBuf++] = zLine[index++];
}
if( cchUTF8>1 ){
int utf32;
decodeUtf8(&zLine[index-cchUTF8],&utf32);
useChars += cli_wcwidth(utf32) - 1;
}
maxChars -= useChars;
if( maxChars<=0 ) break;
if( c=='\n' ) break;
}
if( charCnt>0 ){
zBuf[iBuf++] = '\n';
lineCnt++;
|
| ︙ | ︙ | |||
378 379 380 381 382 383 384 385 386 387 388 389 390 391 |
zBuf[k++] = c;
while( cchUTF8<maxUTF8 &&
(zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[k++] = zText[++i];
}
}
else if( fossil_isspace(c) ){
si = i;
sk = k;
if( k==0 || zBuf[k-1]!=' ' ){
zBuf[k++] = ' ';
}
}else{
| > > > > > | 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 |
zBuf[k++] = c;
while( cchUTF8<maxUTF8 &&
(zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[k++] = zText[++i];
}
}
if( cchUTF8>1 ){
int utf32;
decodeUtf8(&zText[k-cchUTF8],&utf32);
kc += cli_wcwidth(utf32) - 1;
}
else if( fossil_isspace(c) ){
si = i;
sk = k;
if( k==0 || zBuf[k-1]!=' ' ){
zBuf[k++] = ' ';
}
}else{
|
| ︙ | ︙ |