Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | merge 8.5 |
|---|---|
| Timelines: | family | ancestors | descendants | both | core-8-6-branch |
| Files: | files | file ages | folders |
| SHA3-256: |
62362d0caadda237b39c64cd152badb1 |
| User & Date: | dgp 2020-05-06 21:42:47.884 |
Context
|
2020-05-07
| ||
| 10:09 | Optimize Tcl_UtfToUniCharDString() check-in: 806e1e868c user: jan.nijtmans tags: core-8-6-branch | |
|
2020-05-06
| ||
| 21:52 | merge 8.6 check-in: 4d08cde908 user: dgp tags: core-8-branch | |
| 21:42 | merge 8.5 check-in: 62362d0caa user: dgp tags: core-8-6-branch | |
| 21:08 | Tighten optimization in Tcl_NumUtfChars. Explain in comments. check-in: dabb52db36 user: dgp tags: core-8-5-branch | |
| 19:31 | merge 8.5 check-in: 01956c0799 user: dgp tags: core-8-6-branch | |
Changes
Changes to generic/tclUtf.c.
| ︙ | ︙ | |||
584 585 586 587 588 589 590 |
*
*---------------------------------------------------------------------------
*/
int
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
| | | < < < < < < < > | < > > > | > > > > > > > > > | > | > > > > | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 |
*
*---------------------------------------------------------------------------
*/
int
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
int length) /* The length of the string in bytes, or -1
* for strlen(string). */
{
Tcl_UniChar ch = 0;
int i = 0;
if (length < 0) {
/* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
while ((*src != '\0') && (i < INT_MAX)) {
src += TclUtfToUniChar(src, &ch);
i++;
}
} else {
/* Will return value between 0 and length. No overflow checks. */
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to breakpoint in scan where optimization is lost */
const char *optPtr = endPtr - TCL_UTF_MAX + 1;
/*
* Optimize away the call in this loop. Justified because...
* when (src < optPtr), (endPtr - src) > (endPtr - optPtr)
* By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1
* So (endPtr - src) >= TCL_UTF_MAX, and passing that to
* Tcl_UtfCharComplete we know will cause return of 1.
*/
while ((src < optPtr)
/* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
#if TCL_UTF_MAX < 4
if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
/* treat F0 - F4 as single character */
ch = 0;
src++;
} else
#endif
src += TclUtfToUniChar(src, &ch);
i++;
}
/* Loop over the remaining string where call must happen */
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
#if TCL_UTF_MAX < 4
if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
/* treat F0 - F4 as single character */
ch = 0;
src++;
} else
#endif
src += TclUtfToUniChar(src, &ch);
i++;
}
if (src < endPtr) {
/*
* String ends in an incomplete UTF-8 sequence.
* Count every byte in it.
*/
i += endPtr - src;
}
}
return i;
}
/*
|
| ︙ | ︙ |