Fossil

Diff
Login

Differences From Artifact [cf4a82f353]:

To Artifact [5d847583fb]:


155
156
157
158
159
160
161

162
163
164
165
166
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183
184
185
** as one, so this function does not calculate the effective "display width".
*/
int strlen_utf8(const char *zString, int lengthBytes)
{
#if 0
  assert( lengthBytes>=0 );
#endif

  int lengthUTF8=0; /* Counted UTF-8 sequences. */
  int i;
  for( i=0; i<lengthBytes; i++ ){
    char c = zString[i];
    lengthUTF8++;
    if( (c&0xc0)==0xc0 ){                     /* Any UTF-8 lead byte 11xxxxxx */
      int cchUTF8=1; /* Code units consumed. */
      int maxUTF8=1; /* Expected sequence length. */
      if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
      else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
      else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */
      while( i<lengthBytes-1 &&
              cchUTF8<maxUTF8 &&

              (zString[i+1]&0xc0)==0x80 ){    /* UTF-8 trail byte 10vvvvvv */
        cchUTF8++;
        i++;
      }
    }
  }
  return lengthUTF8;
}

/*
** This function is called when printing a logical comment line to calculate







>
|
<
|

<
<
|
|
|
|
|
<
|
>
|
|
|
<







155
156
157
158
159
160
161
162
163

164
165


166
167
168
169
170

171
172
173
174
175

176
177
178
179
180
181
182
** as one, so this function does not calculate the effective "display width".
*/
int strlen_utf8(const char *zString, int lengthBytes)
{
#if 0
  assert( lengthBytes>=0 );
#endif
  int i;          /* Counted bytes. */
  int lengthUTF8; /* Counted UTF-8 sequences. */

  for( i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++ ){
    char c = zString[i];


    int cchUTF8=1; /* Code units consumed. */
    int maxUTF8=1; /* Expected sequence length. */
    if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
    else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
    else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */

    while( cchUTF8<maxUTF8 &&
            i<lengthBytes-1 &&
            (zString[i+1]&0xc0)==0x80 ){    /* UTF-8 trail byte 10vvvvvv */
      cchUTF8++;
      i++;

    }
  }
  return lengthUTF8;
}

/*
** This function is called when printing a logical comment line to calculate
221
222
223
224
225
226
227

228
229
230
231
232
233
234
  int wordBreak,         /* [in] Non-zero to try breaking on word boundaries. */
  int origBreak,         /* [in] Non-zero to break before original comment. */
  int *pLineCnt,         /* [in/out] Pointer to the total line count. */
  const char **pzLine    /* [out] Pointer to the end of the logical line. */
){
  int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */

  if( !zLine ) return;
  if( lineChars<=0 ) return;
#if 0
  assert( indent<sizeof(zBuf)-5 );       /* See following comments to explain */
  assert( origIndent<sizeof(zBuf)-5 );   /* these limits. */
#endif
  if( indent>sizeof(zBuf)-6 )   /* Limit initial indent to fit output buffer. */







>







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
  int wordBreak,         /* [in] Non-zero to try breaking on word boundaries. */
  int origBreak,         /* [in] Non-zero to break before original comment. */
  int *pLineCnt,         /* [in/out] Pointer to the total line count. */
  const char **pzLine    /* [out] Pointer to the end of the logical line. */
){
  int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
  char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
  int cchUTF8, maxUTF8;       /* Helper variables to count UTF-8 sequences. */
  if( !zLine ) return;
  if( lineChars<=0 ) return;
#if 0
  assert( indent<sizeof(zBuf)-5 );       /* See following comments to explain */
  assert( origIndent<sizeof(zBuf)-5 );   /* these limits. */
#endif
  if( indent>sizeof(zBuf)-6 )   /* Limit initial indent to fit output buffer. */
292
293
294
295
296
297
298
299

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
        break;
      }
      charCnt++;
    }else{
      charCnt++;
    }
    assert( c!='\n' || charCnt==0 );
    /*

    ** Avoid output of incomplete UTF-8 sequences, and also avoid line breaks
    ** inside UTF-8 sequences. Incomplete, ill-formed and overlong sequences are
    ** kept together. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to 0xF7 are
    ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively,
    ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte
    ** sequences (as lone trail bytes).
    */
    if( (c&0xc0)==0xc0 && zLine[index]!=0 ){  /* Any UTF-8 lead byte 11xxxxxx */
      int cchUTF8=1; /* Code units consumed. */
      int maxUTF8=1; /* Expected sequence length. */
      zBuf[iBuf++]=c;
      if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
      else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
      else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */
      while( cchUTF8<maxUTF8 &&
              (zLine[index]&0xc0)==0x80 ){    /* UTF-8 trail byte 10vvvvvv */
        cchUTF8++;
        zBuf[iBuf++] = zLine[index++];
      }
      maxChars--;
    }else{
      zBuf[iBuf++] = c;
      maxChars -= useChars;
    }
    if( maxChars<=0 ) break;
    if( c=='\n' ) break;
  }
  if( charCnt>0 ){
    zBuf[iBuf++] = '\n';
    lineCnt++;
  }







<
>
|
<
<
<
<
<
<
<
|
|
<
|
|
|
|
|
|
|
|
<
<
<
|
<







290
291
292
293
294
295
296

297
298







299
300

301
302
303
304
305
306
307
308



309

310
311
312
313
314
315
316
        break;
      }
      charCnt++;
    }else{
      charCnt++;
    }
    assert( c!='\n' || charCnt==0 );

    zBuf[iBuf++] = c;
    /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */







    cchUTF8=1; /* Code units consumed. */
    maxUTF8=1; /* Expected sequence length. */

    if( (c&0xe0)==0xc0 )maxUTF8=2;          /* UTF-8 lead byte 110vvvvv */
    else if( (c&0xf0)==0xe0 )maxUTF8=3;     /* UTF-8 lead byte 1110vvvv */
    else if( (c&0xf8)==0xf0 )maxUTF8=4;     /* UTF-8 lead byte 11110vvv */
    while( cchUTF8<maxUTF8 &&
            (zLine[index]&0xc0)==0x80 ){    /* UTF-8 trail byte 10vvvvvv */
      cchUTF8++;
      zBuf[iBuf++] = zLine[index++];
    }



    maxChars -= useChars;

    if( maxChars<=0 ) break;
    if( c=='\n' ) break;
  }
  if( charCnt>0 ){
    zBuf[iBuf++] = '\n';
    lineCnt++;
  }
360
361
362
363
364
365
366

367
368
369
370
371
372
373
){
  int maxChars = width - indent;
  int si, sk, i, k, kc;
  int doIndent = 0;
  char *zBuf;
  char zBuffer[400];
  int lineCnt = 0;


  if( width<0 ){
    comment_set_maxchars(indent, &maxChars);
  }
  if( zText==0 ) zText = "(NULL)";
  if( maxChars<=0 ){
    maxChars = strlen(zText);







>







346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
){
  int maxChars = width - indent;
  int si, sk, i, k, kc;
  int doIndent = 0;
  char *zBuf;
  char zBuffer[400];
  int lineCnt = 0;
  int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */

  if( width<0 ){
    comment_set_maxchars(indent, &maxChars);
  }
  if( zText==0 ) zText = "(NULL)";
  if( maxChars<=0 ){
    maxChars = strlen(zText);
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406

407
408
409
410
411
412
413
414
415
416
      }
      if( zBuf!=zBuffer) fossil_free(zBuf);
      return lineCnt;
    }
    for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
      char c = zText[i];
      kc++; /* Count complete UTF-8 sequences. */
      /*
      ** Avoid line breaks inside UTF-8 sequences. Incomplete, ill-formed and
      ** overlong sequences are kept together. The invalid lead bytes 0xC0 to
      ** 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and
      ** 4-byte sequences, respectively, the other invalid lead bytes 0xF8 to
      ** 0xFF are treated as invalid 1-byte sequences (as lone trail bytes).
      */
      if( (c&0xc0)==0xc0 && zText[i+1]!=0 ){  /* Any UTF-8 lead byte 11xxxxxx */
        int cchUTF8=1; /* Code units consumed. */
        int maxUTF8=1; /* Expected sequence length. */
        if( (c&0xe0)==0xc0 )maxUTF8=2;        /* UTF-8 lead byte 110vvvvv */
        else if( (c&0xf0)==0xe0 )maxUTF8=3;   /* UTF-8 lead byte 1110vvvv */
        else if( (c&0xf8)==0xf0 )maxUTF8=4;   /* UTF-8 lead byte 11110vvv */

        zBuf[k++] = c;
        while( cchUTF8<maxUTF8 &&
                (zText[i+1]&0xc0)==0x80 ){    /* UTF-8 trail byte 10vvvvvv */
          cchUTF8++;
          zBuf[k++] = zText[++i];
        }
      }
      else if( fossil_isspace(c) ){
        si = i;
        sk = k;







<
|
<
<
<
<
<
<
|
|
|
|
|
>


|







374
375
376
377
378
379
380

381






382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
      }
      if( zBuf!=zBuffer) fossil_free(zBuf);
      return lineCnt;
    }
    for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
      char c = zText[i];
      kc++; /* Count complete UTF-8 sequences. */

      /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */






      cchUTF8=1; /* Code units consumed. */
      maxUTF8=1; /* Expected sequence length. */
      if( (c&0xe0)==0xc0 )maxUTF8=2;        /* UTF-8 lead byte 110vvvvv */
      else if( (c&0xf0)==0xe0 )maxUTF8=3;   /* UTF-8 lead byte 1110vvvv */
      else if( (c&0xf8)==0xf0 )maxUTF8=4;   /* UTF-8 lead byte 11110vvv */
      if( maxUTF8>1 ){
        zBuf[k++] = c;
        while( cchUTF8<maxUTF8 &&
                (zText[i+1]&0xc0)==0x80 ){  /* UTF-8 trail byte 10vvvvvv */
          cchUTF8++;
          zBuf[k++] = zText[++i];
        }
      }
      else if( fossil_isspace(c) ){
        si = i;
        sk = k;