Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Fix [a814ee5bbd] - string compare/equal -length for long strings |
|---|---|
| Timelines: | family | ancestors | descendants | both | trunk | main |
| Files: | files | file ages | folders |
| SHA3-256: |
e831429a4d9cbc8a54833891c0d00b6e |
| User & Date: | apnadkarni 2023-04-24 16:28:21.363 |
Context
|
2023-05-02
| ||
| 16:43 | Merge 9.0 check-in: d66ed253e8 user: jan.nijtmans tags: tip-626 | |
|
2023-04-24
| ||
| 16:49 | Fix [9369f83649] - binary format/scan for big data check-in: 06ceffb2f3 user: apnadkarni tags: trunk, main | |
| 16:28 | Fix [a814ee5bbd] - string compare/equal -length for long strings check-in: e831429a4d user: apnadkarni tags: trunk, main | |
| 15:51 | Adapt signature of TclCompileScript() to fit better after TIP 660. check-in: 815e08b6d1 user: dgp tags: trunk, main | |
Changes
Changes to generic/tclCmdMZ.c.
| ︙ | ︙ | |||
2638 2639 2640 2641 2642 2643 2644 |
/*
* Remember to keep code here in some sync with the byte-compiled versions
* in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
const char *string2;
| | | | | 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 |
/*
* Remember to keep code here in some sync with the byte-compiled versions
* in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
const char *string2;
int i, match, nocase = 0;
Tcl_Size length, reqlength = -1;
if (objc < 3 || objc > 6) {
str_cmp_args:
Tcl_WrongNumArgs(interp, 1, objv,
"?-nocase? ?-length int? string1 string2");
return TCL_ERROR;
}
for (i = 1; i < objc-2; i++) {
string2 = Tcl_GetStringFromObj(objv[i], &length);
if ((length > 1) && !strncmp(string2, "-nocase", length)) {
nocase = 1;
} else if ((length > 1)
&& !strncmp(string2, "-length", length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
i++;
if (TclGetSizeIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
return TCL_ERROR;
}
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"bad option \"%s\": must be -nocase or -length",
string2));
Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option",
|
| ︙ | ︙ | |||
2713 2714 2715 2716 2717 2718 2719 |
{
/*
* Remember to keep code here in some sync with the byte-compiled versions
* in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
| | > | | 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 |
{
/*
* Remember to keep code here in some sync with the byte-compiled versions
* in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
int match, nocase, status;
Tcl_Size reqlength;
status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength);
if (status != TCL_OK) {
return status;
}
objv += objc-2;
match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength);
Tcl_SetObjResult(interp, Tcl_NewWideIntObj(match));
return TCL_OK;
}
int
TclStringCmpOpts(
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[], /* Argument objects. */
int *nocase,
Tcl_Size *reqlength)
{
int i;
Tcl_Size length;
const char *string;
*reqlength = -1;
*nocase = 0;
|
| ︙ | ︙ | |||
2757 2758 2759 2760 2761 2762 2763 |
*nocase = 1;
} else if ((length > 1)
&& !strncmp(string, "-length", length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
i++;
| | | 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 |
*nocase = 1;
} else if ((length > 1)
&& !strncmp(string, "-length", length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
i++;
if (TclGetSizeIntFromObj(interp, objv[i], reqlength) != TCL_OK) {
return TCL_ERROR;
}
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"bad option \"%s\": must be -nocase or -length",
string));
Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option",
|
| ︙ | ︙ |
Changes to generic/tclInt.h.
| ︙ | ︙ | |||
3382 3383 3384 3385 3386 3387 3388 | MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, Tcl_Size numBytes); typedef int (*memCmpFn_t)(const void*, const void*, size_t); MODULE_SCOPE int TclStringCmp(Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, int checkEq, int nocase, Tcl_Size reqlength); MODULE_SCOPE int TclStringCmpOpts(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], int *nocase, | | | 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 | MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, Tcl_Size numBytes); typedef int (*memCmpFn_t)(const void*, const void*, size_t); MODULE_SCOPE int TclStringCmp(Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, int checkEq, int nocase, Tcl_Size reqlength); MODULE_SCOPE int TclStringCmpOpts(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], int *nocase, Tcl_Size *reqlength); MODULE_SCOPE int TclStringMatch(const char *str, Tcl_Size strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, Tcl_Obj *patternObj, int flags); MODULE_SCOPE void TclSubstCompile(Tcl_Interp *interp, const char *bytes, Tcl_Size numBytes, int flags, Tcl_Size line, struct CompileEnv *envPtr); |
| ︙ | ︙ |
Changes to generic/tclStringObj.c.
| ︙ | ︙ | |||
3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 |
int empty, match;
Tcl_Size length, s1len = 0, s2len = 0;
memCmpFn_t memCmpFn;
if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
/*
* Always match at 0 chars of if it is the same obj.
*/
match = 0;
} else {
if (!nocase && TclIsPureByteArray(value1Ptr)
&& TclIsPureByteArray(value2Ptr)) {
/*
* Use binary versions of comparisons since that won't cause undue
| > | 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 |
int empty, match;
Tcl_Size length, s1len = 0, s2len = 0;
memCmpFn_t memCmpFn;
if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
/*
* Always match at 0 chars of if it is the same obj.
* Note: as documented reqlength negative means it is ignored
*/
match = 0;
} else {
if (!nocase && TclIsPureByteArray(value1Ptr)
&& TclIsPureByteArray(value2Ptr)) {
/*
* Use binary versions of comparisons since that won't cause undue
|
| ︙ | ︙ | |||
3575 3576 3577 3578 3579 3580 3581 |
#else
checkEq
#endif
) {
memCmpFn = memcmp;
s1len *= sizeof(Tcl_UniChar);
s2len *= sizeof(Tcl_UniChar);
| | | 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 |
#else
checkEq
#endif
) {
memCmpFn = memcmp;
s1len *= sizeof(Tcl_UniChar);
s2len *= sizeof(Tcl_UniChar);
if (reqlength > 0) {
reqlength *= sizeof(Tcl_UniChar);
}
} else {
memCmpFn = (memCmpFn_t) TclUniCharNcmp;
}
}
}
|
| ︙ | ︙ | |||
3619 3620 3621 3622 3623 3624 3625 |
match = 0;
goto matchdone;
}
} else {
s1 = Tcl_GetStringFromObj(value1Ptr, &s1len);
s2 = Tcl_GetStringFromObj(value2Ptr, &s2len);
}
| | | | | | 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 |
match = 0;
goto matchdone;
}
} else {
s1 = Tcl_GetStringFromObj(value1Ptr, &s1len);
s2 = Tcl_GetStringFromObj(value2Ptr, &s2len);
}
if (!nocase && checkEq && reqlength < 0) {
/*
* When we have equal-length we can check only for
* (in)equality. We can use memcmp in all (n)eq cases because
* we don't need to worry about lexical LE/BE variance.
*/
memCmpFn = memcmp;
} else {
/*
* As a catch-all we will work with UTF-8. We cannot use
* memcmp() as that is unsafe with any string containing NUL
* (\xC0\x80 in Tcl's utf rep). We can use the more efficient
* TclpUtfNcmp2 if we are case-sensitive and no specific
* length was requested.
*/
if ((reqlength < 0) && !nocase) {
memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
} else {
s1len = Tcl_NumUtfChars(s1, s1len);
s2len = Tcl_NumUtfChars(s2, s2len);
memCmpFn = (memCmpFn_t)
(nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
}
}
}
/* At this point s1len, s2len, and reqlength should by now have been
* adjusted so that they are all in the units expected by the selected
* comparison function.
*/
length = (s1len < s2len) ? s1len : s2len;
if (reqlength < 0) {
/*
* The requested length is negative, so ignore it by setting it
* to length + 1 to correct the match var.
*/
reqlength = length + 1;
} else if (reqlength > 0 && reqlength < length) {
length = reqlength;
}
if (checkEq && reqlength < 0 && (s1len != s2len)) {
match = 1; /* This will be reversed below. */
} else {
/*
* The comparison function should compare up to the minimum byte
* length only.
*/
|
| ︙ | ︙ |
Changes to tests/bigdata.test.
| ︙ | ︙ | |||
135 136 137 138 139 140 141 |
}
#
# string compare/equal
bigtestRO string-equal/compare-bigdata-1 "string compare/equal equal strings" {0 1} -body {
list [string compare $s1 $s2] [string equal $s1 $s2]
} -setup {
| < | | | | < | | > | | 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
}
#
# string compare/equal
bigtestRO string-equal/compare-bigdata-1 "string compare/equal equal strings" {0 1} -body {
list [string compare $s1 $s2] [string equal $s1 $s2]
} -setup {
set s1 [bigString 0x100000000]
set s2 [bigString 0x100000000]; # Separate so Tcl_Obj is not the same
} -cleanup {
bigClean
}
bigtestRO string-equal/compare-bigdata-2 "string compare/equal -length unequal strings" {-1 0 0 1} -body {
# Also tests lengths do not wrap
set result {}
lappend result [string compare $s1 $s2]
lappend result [string equal $s1 $s2]
# Check lengths > UINT_MAX
# Also that lengths do not truncate to sizeof(int)
lappend result [string compare -length 0x100000000 $s1 $s2]
lappend result [string equal -length 0x100000000 $s1 $s2]
} -setup {
set s1 [bigString 0x100000001]
set s2 [bigString 0x100000001 0x100000000]; # Differs in last char
} -cleanup {
bigClean
}
# -constraints bug-a814ee5bbd
#
# string first
bigtestRO string-first-bigdata-1 "string first > INT_MAX" {2147483648 -1 2147483650 1} -body {
list \
[string first X $s] \
[string first Y $s] \
|
| ︙ | ︙ |