| ︙ | | |
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
+
-
+
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
|
** as one, so this function does not calculate the effective "display width".
*/
int strlen_utf8(const char *zString, int lengthBytes)
{
#if 0
assert( lengthBytes>=0 );
#endif
int i; /* Counted bytes. */
int lengthUTF8=0; /* Counted UTF-8 sequences. */
int lengthUTF8; /* Counted UTF-8 sequences. */
int i;
for( i=0; i<lengthBytes; i++ ){
for( i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++ ){
char c = zString[i];
lengthUTF8++;
if( (c&0xc0)==0xc0 ){ /* Any UTF-8 lead byte 11xxxxxx */
int cchUTF8=1; /* Code units consumed. */
int maxUTF8=1; /* Expected sequence length. */
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
int cchUTF8=1; /* Code units consumed. */
int maxUTF8=1; /* Expected sequence length. */
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
while( i<lengthBytes-1 &&
cchUTF8<maxUTF8 &&
(zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
i++;
while( cchUTF8<maxUTF8 &&
i<lengthBytes-1 &&
(zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
i++;
}
}
}
return lengthUTF8;
}
/*
** This function is called when printing a logical comment line to calculate
|
| ︙ | | |
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
+
|
int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
int origBreak, /* [in] Non-zero to break before original comment. */
int *pLineCnt, /* [in/out] Pointer to the total line count. */
const char **pzLine /* [out] Pointer to the end of the logical line. */
){
int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
if( !zLine ) return;
if( lineChars<=0 ) return;
#if 0
assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
#endif
if( indent>sizeof(zBuf)-6 ) /* Limit initial indent to fit output buffer. */
|
| ︙ | | |
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
|
-
-
+
+
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
+
-
|
break;
}
charCnt++;
}else{
charCnt++;
}
assert( c!='\n' || charCnt==0 );
/*
** Avoid output of incomplete UTF-8 sequences, and also avoid line breaks
zBuf[iBuf++] = c;
/* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
** inside UTF-8 sequences. Incomplete, ill-formed and overlong sequences are
** kept together. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to 0xF7 are
** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively,
** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte
** sequences (as lone trail bytes).
*/
if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */
int cchUTF8=1; /* Code units consumed. */
int maxUTF8=1; /* Expected sequence length. */
cchUTF8=1; /* Code units consumed. */
maxUTF8=1; /* Expected sequence length. */
zBuf[iBuf++]=c;
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
while( cchUTF8<maxUTF8 &&
(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[iBuf++] = zLine[index++];
}
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
while( cchUTF8<maxUTF8 &&
(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[iBuf++] = zLine[index++];
}
maxChars--;
}else{
zBuf[iBuf++] = c;
maxChars -= useChars;
maxChars -= useChars;
}
if( maxChars<=0 ) break;
if( c=='\n' ) break;
}
if( charCnt>0 ){
zBuf[iBuf++] = '\n';
lineCnt++;
}
|
| ︙ | | |
360
361
362
363
364
365
366
367
368
369
370
371
372
373
|
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
|
+
|
){
int maxChars = width - indent;
int si, sk, i, k, kc;
int doIndent = 0;
char *zBuf;
char zBuffer[400];
int lineCnt = 0;
int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
if( width<0 ){
comment_set_maxchars(indent, &maxChars);
}
if( zText==0 ) zText = "(NULL)";
if( maxChars<=0 ){
maxChars = strlen(zText);
|
| ︙ | | |
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
|
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
|
-
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-
+
|
}
if( zBuf!=zBuffer) fossil_free(zBuf);
return lineCnt;
}
for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
char c = zText[i];
kc++; /* Count complete UTF-8 sequences. */
/*
** Avoid line breaks inside UTF-8 sequences. Incomplete, ill-formed and
/* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
** overlong sequences are kept together. The invalid lead bytes 0xC0 to
** 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and
** 4-byte sequences, respectively, the other invalid lead bytes 0xF8 to
** 0xFF are treated as invalid 1-byte sequences (as lone trail bytes).
*/
if( (c&0xc0)==0xc0 && zText[i+1]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */
int cchUTF8=1; /* Code units consumed. */
int maxUTF8=1; /* Expected sequence length. */
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
cchUTF8=1; /* Code units consumed. */
maxUTF8=1; /* Expected sequence length. */
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
if( maxUTF8>1 ){
zBuf[k++] = c;
while( cchUTF8<maxUTF8 &&
(zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
(zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
cchUTF8++;
zBuf[k++] = zText[++i];
}
}
else if( fossil_isspace(c) ){
si = i;
sk = k;
|
| ︙ | | |