Artifact f833292dea48475f19080f2bdc6f4122bd926d48:
- File
packages/string/lib/string.test.tcl
— part of check-in
[0aed25aabc]
at
2021-06-19 10:04:37
on branch trunk
— list
add flatten
fix struncate
ycl string encode
further development on related test
(user: pooryorick size: 22360)
#! /bin/env tclsh package require {ycl test} proc suite_main {} { global auto_path package require {ycl proc} [yclprefix] proc alias alias [yclprefix] proc alias alias aliases [yclprefix] proc aliases aliases { {ycl list} { sl } {ycl ns local} { rename } {ycl test} { cleanup1 data } } [yclprefix] test init rename [namespace current]::test {} aliases { {ycl test} { test } } package require {ycl test data} lappend setup0 [list set auto_path $auto_path] lappend setup0 { package require {ycl proc} [yclprefix] proc alias alias [yclprefix] proc alias alias aliases [yclprefix] proc aliases package require {ycl string} alias ycl [yclprefix] aliases { {ycl math} { expr } {ycl list} { product sl zip } {ycl string} { cmp dedent doublequote regsub shortmatch delimit encode decode isnumeric isdecimal ltree macro mreplace prefix printable regsplit reverse split suffix template to trim valid validate } {ycl string printable} {} {ycl test} { data } {ycl test data} } proc checkeval {options text} { expr {[printable [eval lindex $text] {*}$options] eq $text} } set quote1 "the good is oft interred with their bones" set text1 " a\n b\\n c\x03 d\a jääär \$ \[ " } set setup0 [join $setup0 \n] set setup2 [join [list $setup0 { package require {ycl test data} aliases { {ycl test} { data } } }] \n] lappend setup3 $setup2 lappend setup3 { data unichars set unicharcount [llength $unichars] set successmap {65536 0 1114112 1} if {![dict exists $successmap $unicharcount]} { error [list {unknown unicode character count} $unicharcount] } set expectedidx [dict get $successmap $unicharcount] set expected { ascii 127 big5 13864 cesu-8 {65536 1114112} cp1250 256 cp1251 256 cp1252 256 cp1253 253 cp1254 256 cp1255 245 cp1256 256 cp1257 254 cp1258 256 cp437 256 cp737 256 cp775 256 cp850 256 cp852 256 cp855 256 cp857 253 cp860 256 cp861 256 cp862 256 cp863 256 cp864 253 cp865 256 cp866 256 cp869 256 cp874 248 cp932 7520 cp936 21920 cp949 17177 cp950 13653 dingbats 253 ebcdic 256 euc-cn 7604 euc-jp 7101 euc-kr 8384 gb12345 7548 gb1988 223 gb2312 7604 gb2312-raw 7445 iso2022 18819 iso2022-jp 18814 iso2022-kr 8446 iso8859-1 256 iso8859-10 256 iso8859-13 256 iso8859-14 256 iso8859-15 256 iso8859-16 256 iso8859-2 256 iso8859-3 249 iso8859-4 256 iso8859-5 256 iso8859-6 211 iso8859-7 250 iso8859-8 220 iso8859-9 256 jis0201 223 jis0208 6880 jis0212 6068 koi8-r 256 koi8-u 256 ksc5601 8225 macCentEuro 256 macCroatian 256 macCyrillic 256 macDingbats 253 macGreek 256 macIceland 256 macJapan 7385 macRoman 256 macRomania 256 macThai 251 macTurkish 256 macUkraine 256 shiftjis 7074 symbol 249 tis-620 214 ucs-2 {65536 1114112} ucs-2be {65536 1114112} ucs-2le {65536 1114112} unicode {65536 1114112} utf-16 {65536 1114112} utf-16be {65536 1114112} utf-16le {65536 1114112} utf-8 {65536 1114112} } } set setup3 [join $setup3 \n] set setup4 $setup0 append setup4 { set chars {0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v} } test {base encode 0} {} -setup $setup4 -body { set value e set encoded $value ycl string base encode encoded $chars set decoded $encoded ycl string base decode decoded $chars return $decoded } -result [sl { e }] test {base encode 1} {} -setup $setup4 -body { set value ze set encoded $value ycl string base encode encoded $chars set decoded $encoded ycl string base decode decoded $chars return $decoded } -result [sl { ze }] test {base encode 2} {} -setup $setup4 -body { set value zea set encoded $value ycl string base encode encoded $chars set decoded $encoded ycl string base decode decoded $chars return $decoded } -result [sl { zea }] test {base encode 3} {} -setup $setup4 -body { set value [encoding convertto utf-8 [data bytes]] set encoded $value ycl string base encode encoded $chars puts [list now here] set decoded $encoded ycl string base decode decoded $chars expr {$decoded eq $value} } -result [sl { 1 }] test {base encode bitcoin} {} -setup $setup4 -body { set res {} set encoders [sl { {ycl string {base encode mod bignum}} #{ycl string {base encode mod}} }] set decoders [sl { {ycl string {base decode mod bignum}} #{ycl string {base decode mod}} }] set charsets [sl { [list bitcoin [ycl string {base 58 character set bitcoin}]] [list ripple [ycl string {base 58 character set ripple}]] [list flickr [ycl string {base 58 character set flickr}]] }] set lists [list $encoders $decoders $charsets] product lists foreach {value standard} [sl { \0 1 \0\0 11 \0\0[]1 11r {Hello World!} {2NEpo7TZRRrLZSi2U} {The quick brown fox jumps over the lazy dog.} USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z \x00\x00\x28\x7f\xb4\xcd 11233QC4 [string range [data bytes] 0 100] {} #[string range [data bytes] 0 1000] #{} }] { foreach set $lists { set encoded $value lassign $set encoder decoder charset lassign $charset setname charset {*}$encoder encoded $charset if {$standard ne {}} { if {$setname ne {bitcoin}} { set set1 [ycl string {base 58 character set bitcoin}] set set2 [ycl string [ list base 58 character set $setname]] zip set2 set1 set mapped [string map $set2 $encoded] } else { set mapped $encoded } expr res1 {$mapped eq $standard} if {!$res1} { lappend res \n[list incorrect encoding $encoder $decoder $charset [printable $standard] [printable $encoded]] } } set decoded $encoded {*}$decoder decoded $charset expr res1 {$decoded eq $value} if {!$res1} { lappend res [list incorrect decoding $encoder $decoder $charset [printable $decoded] [printable $value]] } } } lappend res done return $res } -result [sl { done }] test {base encode bad input} {} -setup $setup4 -body { data unichars value set encoded $value catch { ycl string base encode encoded $chars } cres copts return $cres } -result [sl { {value is not bytes} position 536 }] test cmp1 {} -setup $setup0 -body { cmp hello hello } -result -1 test cmp2 {} -setup $setup0 -body { cmp hello heplo } -result 2 test cmp3 {} -setup $setup0 -body { cmp hello pello } -result 0 test cmp4 {} -setup $setup0 -body { cmp hello hellp } -result 4 test cmp5 {} -setup $setup0 -body { cmp {} {} } -result -1 test cmp6 {} -setup $setup0 -body { cmp {} hello } -result 0 test cmp7 {} -setup $setup0 -body { cmp hello {} } -result 0 test cmp8 {} -setup $setup0 -body { cmp horton horton } -result -1 test cmp9 {} -setup $setup0 -body { cmp horpon horton } -result 3 test cmp10 {} -setup $setup0 -body { cmp horpop horton } -result 3 test cmp11 {} -setup $setup0 -body { cmp horton hortoo } -result 5 test cmp12 {} -setup $setup0 -body { cmp o {} } -result 0 test cmp13 {} -setup $setup0 -body { cmp on on } -result -1 test cmp13 {} -setup $setup0 -body { cmp on on } -result -1 test dedent {} -setup $setup2 -body { set res [data indented1] dedent res return $res } -cleanup [cleanup1] -result { snode1 node1.1 node1.2 node2 node1.2 node3 node1.2 } test delimit_no_such_delimiter {} -setup $setup0 -body { delimit $quote1 string z } -result {{0 40}} test delimit_no_such_delimiter_strings {} -setup $setup0 -body { set quote2 [delimit $quote1 format strings string z] expr {$quote2 eq [list $quote1]} } -result 1 test delimit_into_zero {} -setup $setup0 -body { delimit $quote1 string od string h string rr into 0 } -result {{0 40}} test delimit_into_two {} -setup $setup0 -body { delimit $quote1 string od string h string rr into 2 } -result {{0 0} {1 1} {2 40}} test delimit_into_two_b {} -setup $setup0 -body { delimit $quote1 string od into 2 } -result {{0 5} {6 7} {8 40}} test delimit_multiple {} -setup $setup0 -body { delimit $quote1 \ string od string h string rr } -result {{0 0} {1 1} {2 5} {6 7} {8 19} {20 21} {22 27} {28 28} {29 30} {31 31} {32 40}} test delimit_delimiter_at_end {} -setup $setup0 -body { delimit $quote1 string s } -result {{0 9} {10 10} {11 39} {40 40}} test delimit_match {} -setup $setup0 -body { delimit $quote1 match of* } -result {{0 11} {12 13} {14 40}} test delimit_match_strings {} -setup $setup0 -body { delimit $quote1 match of* format strings } -result {{the good is } of {t interred with their bones}} test delimit_match_info {} -setup $setup0 -body { delimit $quote1 match of* format info } -result {{first 0 last 11 type unmatched spec {}} {first 12 last 13 type match spec of*} {first 14 last 40 type unmatched spec {}}} test delimit_match_info_strings {} -setup $setup0 -body { delimit $quote1 match of* format {info strings} } -result {{first 0 last 11 type unmatched spec {} string {the good is }} {first 12 last 13 type match spec of* string of} {first 14 last 40 type unmatched spec {} string {t interred with their bones}}} test delimit_re {} -setup $setup0 -body { delimit $quote1 re of } -result {{0 11} {12 13} {14 40}} test delimit_re2 {} -setup $setup0 -body { delimit $quote1 re {(.{8})} } -result {{0 7} {8 15} {16 23} {24 31} {32 39} {40 40}} test delimit_re3 {} -setup $setup0 -body { delimit ennui re {nn+} format strings } -result {e nn ui} test delimit_re_multi {} -setup $setup0 -body { delimit $quote1 re {t } re he } -result {{0 0} {1 2} {3 13} {14 15} {16 30} {31 32} {33 40}} test delimit_re_multi2 {} -setup $setup0 -body { delimit $quote1 format strings re {t } re he string bo } -result {t he { good is of} {t } {interred with t} he {ir } bo nes} test delimit_re_multi_shortest {} -setup $setup0 -body { delimit $quote1 re {^t i} re ^he \ string {t } } -result {{0 0} {1 2} {3 13} {14 15} {16 30} {31 32} {33 40}} test delimit_count {} -setup $setup0 -body { delimit $quote1 string {d } format count } -result 2 test delimit_count_zero {} -setup $setup0 -body { delimit $quote1 string {xzd } format count } -result 0 test delimit_empty_input {} -setup $setup0 -body { delimit {} string z } -result {} test doublequote {} -setup $setup0 -body { set string {"hello", "and goodybe"} doublequote string return $string } -result {"\"hello\", \"and goodybe\""} test encodedecode {} -setup $setup3 -body { if 0 { some versions of tcl-8.6 did not successfully perform round-trip encoding and decoding of surrogate characters in the "unicode" encoding. For those versions of Tcl the number of characters successfully encoded and decoded is 1113088 rather than 1114112. } set encodings [lsort [lmap encoding [encoding names] { # tis-620 map isn't available for some reason if {$encoding ne {identity tis-620}} { set encoding } }]] #set encodings1 {utf-8 unicode utf-16 utf-16be utf-16le} #set encodings1 ascii #set encodings1 cesu-8 set encodings1 $encodings foreach encoding $encodings { if {$encoding ni $encodings1} { # {to do} when Tcl is performant enough, go ahead a check # all encodings continue } puts stderr [list test encodedecode encoding $encoding] set encodeerrors 0 set decodeerrors 0 set numencoded 0 set idx 0 set decodefailranges {} set decodefailrangestart -1 set decodefailrangeend -1 set encodefailranges {} set encodefailrangestart -1 set encodefailrangeend -1 foreach char $unichars { set encoded $char set failed 0 encode encoded $encoding if {[info exists encoded]} { incr numencoded set back $encoded decode back $encoding if {[info exists back]} { if {$back ne $char} { set failed 1 incr decodeerrors } } else { set failed 1 incr decodeerrors } if {$failed} { scan $char %c ord if {$ord > $decodefailrangeend + 1 && $decodefailrangeend >= 0} { lappend decodefailranges [list [ format %x $decodefailrangestart] [ format %x $decodefailrangeend]] set decodefailrangestart $ord set decodefailrangeend $ord } if {$decodefailrangestart == -1} { set decodefailrangestart $ord } else { set decodefailrangeend $ord } } } else { incr encodeerrors scan $char %c ord if {$ord > $encodefailrangeend + 1 && $encodefailrangeend >= 0} { lappend encodefailranges [list [ format %x $encodefailrangestart] [ format %x $encodefailrangeend]] set encodefailrangestart $ord set encodefailrangeend $ord } if {$encodefailrangestart == -1} { set encodefailrangestart $ord } else { set encodefailrangeend $ord } } incr idx } if {$encodefailrangestart >= 0} { lappend encodefailranges [ list [format %x $encodefailrangestart] [ format %x $encodefailrangeend]] } if {$decodefailrangestart >= 0} { lappend decodefailranges [ list [format %x $decodefailrangestart] [ format %x $decodefailrangeend]] } set expected1 [dict get $expected $encoding] if {[llength $expected1] > $expectedidx} { set expected1 [lindex $expected1 $expectedidx] } if {$numencoded != $expected1} { set diff [expr {$numencoded - $expected1}] lappend res $encoding [ list expected $expected1 vs $numencoded diff $diff] } if {$decodeerrors} { lappend res $encoding [list {decode errors} $decodeerrors \ {failed ranges} $decodefailranges] } if {$encodeerrors} { lappend res $encoding [list {encode errors} $encodeerrors \ {failed ranges} $encodefailranges] } } if {![info exists res]} { lappend res success } return $res } -cleanup [cleanup1] -result [sl { success }] test iter {} -setup $setup0 -body { set string banana set iter [ycl string iter $string] while 1 { lappend res [$iter] } return $res } -cleanup [cleanup1] -result [sl { b a n a n a }] test isdecimal {} -setup $setup2 -body { foreach {key val} [lsort -dictionary -stride 2 [data values]] { set val2 $val isdecimal val2 lappend res $val $val2 } return $res } -cleanup [cleanup1] -result [sl { hello {} { 012.038900 } 012.038900 { 15520 } 15520 { 0x15520 } {} { Inf } Inf { 5.2e } {} { 0xhello } {} { - } {} { NaN } NaN { -0009388.3333e+67 } -0009388.3333e+67 { -00099930 } -99930 { 0o00730} {} { True } {} }] test isnumeric {} -setup $setup2 -body { foreach {key val} [lsort -dictionary -stride 2 [data values]] { lappend res $val [isnumeric $val] } return $res } -cleanup [cleanup1] -result [sl { hello {} { 012.038900 } 012.038900 { 15520 } 15520 { 0x15520 } 0x15520 { Inf } Inf { 5.2e } {} { 0xhello } {} { - } {} { NaN } NaN { -0009388.3333e+67 } -0009388.3333e+67 { -00099930 } -99930 { 0o00730} 0o00730 { True } {} }] test macro {} -setup $setup0 -body { set var1 zeroes set string [string trim { one \u5b\u40 two [three four] $var1 \$var2 [@m1 @ six [@m2 seven eight@] ${var1}nine[@m2 ten eleven@]twelve@] thirteen }] proc m1 args { return "five [join $args]" } proc m2 args { return "seven $args" } macro string [sl { m1 [namespace current]::m1 m2 [namespace current]::m2 }] lappend res $string return $res } -cleanup [cleanup1] -result [sl { {one \u5b\u40 two [three four] $var1 \$var2 five @ six seven seven\ eight zeroesnineseven ten eleventwelve thirteen} }] test mreplace {} -setup $setup0 -body { set string {one two three four five six seven eight nine} set replacements { {0 0} p {4 4} l {8 13} {} {14 14} m } mreplace string {*}$replacements lappend res $string return $res } -cleanup [cleanup1] -result [sl { {pne lwo mour five six seven eight nine} }] test printable_eval {} -setup $setup0 -body { set a [printable "one;\x08 \$tw\x00o \[three\]\t \" \{four\}"] eval set b $a return [printable $b] } -cleanup [cleanup1] -result \ {one\;\b\ \$tw\x00o\ \[three]\t\ \"\ \{four\}} test regsub {} -setup $setup0 -body { set string {hello world} regsub string l p lappend res $string set string one-two # hyphen shouldn't be treated as an argument regsub string - _ lappend res $string return $res } -cleanup [cleanup1] -result [sl { {heplo world} one_two }] test reverse {} -setup $setup0 -body { set string hello reverse string lappend res $string return $res } -cleanup [cleanup1] -result [sl { olleh }] test shortmatch_one_match {} -setup $setup0 -body { shortmatch *t* t } -result {0} test shortmatch_one_nomatch {} -setup $setup0 -body { shortmatch *is* t } -result {-1} test shortmatch_two {} -setup $setup0 -body { shortmatch *is* hi } -result {-1} test shortmatch_even {} -setup $setup0 -body { shortmatch *is* establishment } -result {7} test shortmatch_odd {} -setup $setup0 -body { shortmatch *is* establishments } -result {7} test shortmatch_nomatch {} -setup $setup0 -body { shortmatch *x* establishments } -result {-1} test split {} -setup $setup0 -body { set var1 {one {two three} four} split var1 { } return $var1 } -result {one \{two three\} four} test regsplit {} -setup $setup0 -body { set var1 : regsplit : var1 lappend res $var1 set var1 :: regsplit : var1 lappend res $var1 set var1 :one regsplit : var1 lappend res $var1 set var1 one: regsplit : var1 lappend res $var1 set var1 {:::one::two:::four::::} regsplit {:::*} var1 lappend res $var1 set var1 {one.two;three,four five;;;six} regsplit {\.|;|;;;*|,|\s} var1 lappend res $var1 return $res } -result [sl { {{} : {}} {{} : {} : {}} {{} : one} {one : {}} {{} ::: one :: two ::: four :::: {}} {one . two {;} three , four { } five {;;;} six} }] test printable {} -setup $setup0 -body { set commands [sl { default {} beyondascii {ascii false} notclescapes {tclescapes 0} other {other {a c}} }] foreach {label options} $commands { set printable [printable $text1 {*}$options] lappend res $label $printable lappend res [checkeval $options $printable] } lappend res tclsubs [printable $text1 tcl 0 tclsubs 1] lappend res notcl [printable $text1 tcl 0] lappend res {no tcl notclescapes} [printable $text1 tcl 0 tclescapes 0] return $res } -cleanup [cleanup1] -result [sl { default {\ a\n\ b\x5cn\ c\x03\ d\a\ j\xe4\xe4\xe4r\ \$\ \[\ } 1 beyondascii {\ a\n\ b\x5cn\ c\x03\ d\a\ jääär\ \$\ \[\ } 1 notclescapes {\x20a\x0a\x20b\x5cn\x20c\x03\x20d\x07\x20j\xe4\xe4\xe4r\x20\x24\x20\x5b\x20} 1 other {\ \x61\n\ b\x5cn\ \x63\x03\ d\a\ j\xe4\xe4\xe4r\ \$\ \[\ } 1 tclsubs " a\n b\\x5cn c\\x03 d\\a j\\xe4\\xe4\\xe4r \\$ \\\[ " notcl " a\n b\\x5cn c\\x03 d\\a j\\xe4\\xe4\\xe4r $ \[ " {no tcl notclescapes} " a\n b\\x5cn c\\x03 d\\x07 j\\xe4\\xe4\\xe4r $ \[ " }] test prefix {} -setup $setup0 -body { set res {} set value {one two three four five} set suffix { four five} set res1 $value prefix res1 suffix lappend res $res1 set suffix {one two three four five} set res1 $value prefix res1 suffix lappend res $res1 set suffix {} set res1 $value prefix res1 suffix lappend res $res1 return $res } -cleanup [cleanup1] -result [sl { {one two three} {} {one two three four five} }] test suffix {} -setup $setup0 -body { set value {one two three four five} set res1 $value set prefix {one two} suffix res1 prefix lappend res $res1 set prefix {one two three four five} set res1 $value suffix res1 prefix lappend res $res1 set prefix {} set res1 $value suffix res1 prefix lappend res $res1 return $res } -cleanup [cleanup1] -result [sl { { three four five} {} {one two three four five} }] test template {} -setup $setup0 -body { set a 5 set b 7 set expr {[set b]} apply [list {} [template a b # expr { set a @a@ set b @b@ expr {$a + @expr@} }]] } -cleanup [cleanup1] -result 12 test template_customdelim {} -setup $setup0 -body { set a 5 set b 7 set c 3 set d 2 set e 1 set expr {[set b]} apply [list {} [template a !b* # &expr> c ! {` )} d e { set a @a@ set b !b* set c @c@ set d `d) expr {$a + &expr> + $c + $d + `e)} }]] } -cleanup [cleanup1] -result 18 test to_hex {} -setup $setup0 -body { set string one\x1a\x01\0 to hex string lappend res $string set string 词典 catch {to hex string} cres lappend res [lindex $cres 0] return $res } -cleanup [cleanup1] -result [sl { 6f6e651a0100 {character larger than 1 byte} }] test trim {} -setup $setup0 -body { set var "\0 one two three \t\n" trim var return $var } -cleanup [cleanup1] -result {one two three} test valid {} -setup $setup0 -body { set value banana lappend res [valid utf-8 value] set value [binary format c 239] set res1 [valid utf-8 value] lappend res $res1 set value [binary format ccc 239 188 129] set res1 [valid utf-8 value] lappend res $res1 return $res } -cleanup [cleanup1] -result [sl { 1 0 1 }] test validate {} -setup $setup0 -body { set value banana lappend res [validate utf-8 value] set value hello[binary format c 239]goodbye lassign [validate utf-8 value] index lappend res $index set value [binary format ccc 239 188 129] set res1 [validate utf-8 value] lappend res $res1 return $res } -cleanup [cleanup1] -result [sl { 0 5 0 }] cleanupTests } variable cleanup1 { apply {{} { foreach name {res} { catch {unset $name} } }} }