Artifact 49d3e0c32b514ed8b8e5da5edd7d509957887137:
- File
packages/string/lib/string.tcl
— part of check-in
[1c252a3cd0]
at
2020-06-22 00:31:53
on branch trunk
— chan
new routines
carve
interpolate
keep
further development
best working version so far
notes
new routines
absorb addrspaceex
"eval" option to [ls]
"message" design for interactive output
add zip processing to [fin]
sqlite
lossless
work in progress
add test
string
printable
new interface
tree
much faster [last] and [pivot] routines
(user: pooryorick size: 19557)
#! /bin/env tclsh package require {ycl proc} [yclprefix] proc alias alias [yclprefix] proc alias alias aliases [yclprefix] proc aliases package require {ycl ns join} alias ns [yclprefix] ns alias [ns join {} tcl mathfunc max] alias [ns join {} tcl mathfunc min] alias [ns join {} tcl mathop] alias [ns join {} tcl mathop -] alias [ns join {} tcl mathop +] alias [ns join {} tcl mathop /] alias [ns join {} mathop <<] alias [ns join [yclprefix] proc checkargs] alias [ns join [yclprefix] proc stub] aliases { {ycl list} { consume join ldedent dedent } {ycl math} { expr } {ycl var} { $ } } package require {ycl ns join} alias nsjoin [yclprefix] ns join alias regsub_ [ns join {} regsub] alias expr_ [ns join {} expr] alias split_ [ns join {} split] alias string_ [ns join {} string] proc asnumeric value { set count [scan $value %lld result] if {$count} { return $result } set count [scan $value %llf result] if {$count} { return $result } return [scan $value %llx] } proc cat {resultname args} { upvar 1 $resultname result set res {} consume arg args { append res $arg[set arg {}] } set result $res[set res {}] return } variable [nsjoin doc cmp] { description { compare two strings, returning the index at which they differ, or -1. } } proc cmp {str1 str2} { set start 0 length str1 len1 expr last1 {$len1 -1} length str2 len2 expr last2 {$len2 -1} if {$last1 > $last2} { set last1 $last2[set last2 $last1; lindex {}] } set end [/ $last1 2] while 1 { if {$start > $last1} { if {$last2 > $last1} { if {$last1 == -1} { return 0 } else { return [+ $start 1] } } else { return -1 } } set str1range $str1 range str1range $start $end set str2range $str2 range str2range $start $end if {$str1range eq $str2range} { expr start {$end + 1} set end [+ $end [max [/ [- $last1 $end] 2] 1]] } else { if {$start == $end} { return $start } set end [- $end [max [/ [- $end $start] 2] 1]] } } length str1 len1 length str2 len2 set max [- [min $len1 $len2] 1] return $end } proc dedent textname { upvar $textname text split text \n ldedent text join text \n return } variable [nsjoin doc delimit] { description { split input based on delimiters, which are themselves included in the output. Earlier matches take precedence, and where there is a tie for position, shorter matches take precedence. With re delimiters, the ^ anchors the pattern to each index of $input that this function to for the next possible match. } args { input { description { what to delimit } } into { description { maximum number of pieces to chop input into. -1 means as many as possible. This is a count of the delimited pieces, not the delimiters. } default { return -level 0 -1 } } string { description { a string delimiter } default {} count -1 process { lappend delimiters string $string } } match { description { a [string match]-style delimiter } default {} count -1 process { lappend delimiters match $match } } re { description { a [regexp]-style delimiter } default {} count -1 process { lappend delimiters re $re } } format { description { a list of specifiers what to return. Valid specifiers are... info { return results as a dictionary which includes extended information } strings { return strings } indexes { return indexes } count { return a count of matches } } default { set format [list indexes] } constrain { [all $format in $formats] } } } } stub delimit {input args} { package require {ycl list} interp alias {} [nsjoin [namespace current] all] {} [yclprefix] list all } { set res [list] set delimiters [list] set formats [list [STRINGS] [INDEXES] [INFO] [COUNT]] checkargs [$ doc delimit] {*}$args set count 0 length input set previous -1 set add [list apply [list {first last type spec} { upvar count count upvar format format upvar strings strings upvar input input upvar res res if {[INFO] in $format} { set res2 [dict create first $first last $last type $type spec $spec] if {[STRINGS] in $format} { dict update res2 string string { set string $input range string $first $last } } lappend res $res2 } else { if {[STRINGS] in $format} { set range $input lappend res [range range $first $last] } elseif {[COUNT] in $format} { set res $count } else { lappend res [list $first $last] } } } [namespace current]]] for {set i 0} {$i<$len} {incr i} { set matches [list] foreach {type spec} $delimiters { set match [list $type $spec] switch -- $type { string { if {[string first $spec $input $i] == $i} { length spec slen expr match1 {$i + $slen - 1} lappend match $i $match1 lappend matches $match } } match { set range $input if {[set shortmatch [shortmatch $spec [range range $i end]]] > 0} { expr last {$i + $shortmatch} lappend match $i $last lappend matches $match } } re { #don't use -start switch here because semantics are different for ^ set range $input set rematch [regexp -inline -indices $spec [range range $i end]] if {[llength $rematch]} { if {[llength [lindex $rematch 0]] != 1} { #discard submatches lassign $rematch[set rematch {}] rematch } lassign $rematch first last expr first {$first + $i} expr last {$last + $i} lappend match $first $last lappend matches $match } } } } if {[llength $matches]} { incr count set matches [lsort -integer -index 2 $matches] lassign $matches match match2 lassign $match type spec first last lassign $match2 type2 spec2 first2 last2 if {$first == $first2 && $last2 < $last} { lassign $match2 type spec first last } if {$first > $previous + 1} { expr first1 {$previous+1} expr last1 {$first - 1} {*}$add $first1 $last1 unmatched {} } {*}$add $first $last $type $spec set match [list] set match2 [list] set previous $last set i $last } #$into-1 to count partitioned instead of partitions if {$into > -1 && $count >= $into-1 } { break } } if {$previous < $len - 1} { expr first1 {$previous + 1} expr last1 {$len - 1} {*}$add $first1 $last1 unmatched {} } return $res } proc doublequote varname { upvar $varname value regsub value -all {[\\\$\[\"]} {\\\0} set value \"$value[set value {}]\" return } apply [list {} { foreach {name directions verb verbed} { encode {convertto convertfrom} encode encoded decode {convertfrom convertto} decode decoded } { variable [nsjoin doc @name@] { description like [encoding @name@@] but if a character can't be @verbed@ returns an error } lassign $directions convertto convertfrom proc $name args [ string map [list \ @convertto@ [list $convertto] \ @convertfrom@ [list $convertfrom] \ @do@ $verb ] { if {[llength $args] == 1} { lassign $args varname set encoding [encoding system] } else { lassign $args varname encoding } upvar 1 $varname string set new [encoding @convertto@ $encoding $string] set compare [encoding @convertfrom@ $encoding $new] if {$compare eq $string} { set string $new[set new {}] } else { if {[info exists string]} { unset string } } return } ] } } [namespace current]] proc index args { switch [llength $args] { 1 { set idx [lindex $args 0] set stringname string set resname char } 2 { lassign $args stringname idx set resname char } 3 { lassign $args stringname idx resname } default { error [list {wrong number args}] } } upvar $stringname string $resname res set res [string index $string $idx] } variable [nsjoin doc isdecimal] { description if $value is a decimal number returns $value with any surrounding whitespace removed otherwise returns the empty string a leading zero does not signify octal is interpreted as part of a decimal representation } proc isdecimal varname { upvar $varname value trim value if { [string is double -strict $value] && ( ![string is entier $value] || ![regexp {^\s*[+-]*?0[bBoOxX]?} $value] ) } { # all numbers with a mantissa recognized here since only decimal # representations are allowed. return $value } # account for 0-padded decimal integers regsub value {^([+-])*0*([^[:space:]]*)$} {\1\2} if {[string is double $value]} { return $value } set value {} } proc isdict value { expr_ {![catch {dict size $value}]} } variable [ns join doc isnumeric] { description if $value is numeric returns $value with any surrounding whitespace removed otherwise returns the empty string a leading zero does not signify octal is interpreted as part of a decimal representation } proc isnumeric value { trim value # Use [string is double] to accept Inf and NaN if {[string is double $value]} { return $value } regsub value {^\s*([+-])*0[BOXbox]?0*([^[:space:]]*)\s*$} {\1\2} if {[string is double $value]} { return $value } return {} } stub iter string { aliases { {ycl coro call} { autocall body hi reply } } } { proc iter string { set name [nsjoin [namespace current] [info cmdcount]] set res [coroutine $name\0 apply [list string [body { length hi for {set i 0} {$i < $len} {incr i} { index string $i char reply $char } }] [namespace current]] $string] autocall $name } tailcall iter $string } proc length args { switch [llength $args] { 0 { set stringname string set lenname len } 1 { lassign $args stringname set lenname len } 2 { lassign $args stringname lenname } } upvar 1 $stringname string $lenname len set len [string_ length $string] return } proc prepend {stringname new} { upvar 1 $stringname string set string $new$string[set string {}] return } variable [ns join doc range] { description like [string range] but in-place } proc range {stringname args} { upvar $stringname string set string [string_ range $string[set string {}] {*}$args] return $string } proc regsplit {exprs textvar} { upvar 1 $textvar text if {$text eq {}} { return $text } set regexp ((?:(?!$exprs|$).)*)($exprs|$) set text [ lmap {x y z} [regexp -all -inline $regexp $text] { list $y $z }] join text if {[lindex $text end] eq {}} { #remove the last empty string that represents failure to find a #delimiter set text [lreplace $text[set text {}] end end] } else { # the regular expression doesn't detect the empty string after the # delimiter that ends the text lappend text {} } return } proc regsub {stringname args} { if {[llength $args] < 2} { error [list {wrong # args}] } upvar 1 $stringname string set subspec [lindex $args end] set expr [lindex $args end-1] regsub_ {*}[lrange $args 0 end-2] $expr $string[set string {}] $subspec string return } proc replace {stringname args} { upvar $stringname string set string [string replace $string[set string {}] {*}$args] } proc requiredecimal varname { upvar $varname value set newval $value isdecimal newval if {$newval eq {}} { error [list {not a decimal number} $value] } set value $newval } proc reverse stringname { upvar $stringname string set string [string reverse $string[set string {}]] } variable [ns join doc shortmatch] { description { same as [string match], but return -1 if $string doesn't match, and the index of the last char of the shortest match if it does } } proc shortmatch args { set string [lindex $args end] set args [lrange $args 0 end-1] length expr incr {$len / 2} expr last {$len -1} set match -1 set break 0 while 1 { set range $string if {[string match {*}$args [range range 0 $last]]} { if {$match == $last} { break } set match $last expr last {$last - $incr} expr incr {max($incr / 2,1)} } else { if {$incr == 0 || $last >= $len} { break } expr last {$last + $incr} } } return $match } variable [ns join doc split] { description { like the builtin [split] but takes the name of the variable that refers to the string stores the result in that variable } } proc split {varname args} { upvar $varname var set var [split_ $var[set var {}] {*}$args] return } variable [ns join doc splitalnum] { description { splits the value of the given variable into the alpha-numeric, whitespace, punctuation, and control strings that comprise the value stores the result in the given variable } } proc splitalnum varname { upvar $varname var splitpattern var { {^[[:space:]]*([[:alnum:]]+)[[:space:]]*} {^[[:space:]]*([[:punct:]]+)[[:space:]]*} {^[[:space:]]*([[:cntrl:]]+)[[:space:]]*} {^[[:space:]]*([^[:alnum:][:space:][:punct:][:cntrl:]]+)[[:space:]]*} } } variable [ns join doc splitalpha] { description { splits the value of the given variable into the alpha, numeric, whitespace, punctuation, and control strings that comprise the value stores the result in the given variable } } proc splitalpha varname { upvar $varname var splitpattern var { {^[[:space:]]*([[:alpha:]]+)[[:space:]]*} {^[[:space:]]*([[:digit:]]+)[[:space:]]*} {^[[:space:]]*([[:punct:]]+)[[:space:]]*} {^[[:space:]]*([[:cntrl:]]+)[[:space:]]*} {^[[:space:]]*([^[:alpha:][:space:][:digit:][:punct:][:cntrl:]])[[:space:]]*+} } } proc splitpattern {varname patterns} { upvar $varname var set res {} while {[string length $var] > 0} { set len [llength $res] # more specific expressions must occur before more general expressions foreach pattern $patterns { if {[regexp $pattern $var whole part]} { puts [list plackle $part] lappend res $part set var [string range $var[set var {}] [ string length $whole] end] break } } # given the expressions above, this should be impossible to reach if {[llength $res] == $len} { error [list {no match} $var] } } set var $res } namespace ensemble create -command to -map { hex to_hex } proc to_hex varname { upvar 1 $varname string length set res {} for {set i 0} {$i < $len} {incr i} { # given the current implementation of Tcl strings this is expensive # but implement it this way anyway to minimize storage # assuming that the implementation will improve index 0 scan $char %c cardinal if {$cardinal > 255} { error [list {character larger than 1 byte} index $i] } append res [format %02x $cardinal] set string [string range $string 1 end] } set string $res return } variable [ns join doc template] { synopsis { template ?varspec or directive ...? string } description [ { a concise way to invoke [string map] unless otherwise specified each mapped values is encoded as a list containing one item each $varspec is the name contains only characters valid in unbraced $ substiution of a variable to substitute optionally preceded by a delimiter that ends with a character that isn't valid in unbraced $ substitution optionally followed by a delimiter that begins with a character which isn't valid in unbraced $ substituion } directives { # { The subsequent $varspec is not to be quoted with [list] . } = { The subsequent $varspec is a list containing the $odelim and optionally the $cdelim for the $varspec that follows it . If only $odelim is provided , $cdelim shares its value . } ! { A $varspec value of "!" is like "=" , but its effects remain in place for the all the following $varspec values . } } ] } proc template args [string map [list @varchars@ {a-zA-z0-9_}] { if {![llength $args]} { error [list {wrong # args} [llength $args]] } set string [lindex $args end] set args [lrange $args[set args {}] 0 end-1] set script "[ns join {} string] map \[list " set state {} set changedefaultdelim 0 set changedelim 0 set list 1 set odefault @ set cdefault @ set mode varspec foreach arg $args { switch $state { {} { switch $arg { = { set state changedelim set mode varname continue } ! { set state changedefaultdelim set mode varspec continue } \# { set list 0 continue } } } changedelim { set arg [lassign $arg[set arg {}] odelim] if {[llength $arg]} { lassign $arg cdelim } set changedelim 1 set state {} continue } changedefaultdelim { set arg [lassign $arg[set arg {}] odefault] if {[llength $arg]} { lassign $arg cdefault } else { set cdefault $odefault } set changedefaultdelim 0 set state {} continue } default { error [list {unknown state} $state] } } if {!$changedelim} { set odelim $odefault set cdelim $cdefault } if {$mode eq {varspec}} { if {![regexp "^(.*\[^@varchars@])?(\[@varchars@]+|\[@varchars@]*\\(\[^\\)]*\\))(\[^a-zA-z0-9_].*)?$" \ $arg -> odelim1 varname cdelim1]} { error [list {bad varspec} $arg] } if {$odelim1 ne {}} { set odelim $odelim1 } if {$cdelim1 ne {}} { set cdelim $cdelim1 } } else { set varname $arg } if {$list} { set qvarname "\[list \$$varname]" } else { set qvarname \$$varname } append script "$odelim$varname$cdelim $qvarname " set list 1 set changedefaultdelim 0 set changedelim 0 set mode varspec } append script {] } [list $string] uplevel 1 $script }] proc tolower name { upvar $name string set string [string tolower $string[set string {}]] } proc trim {varname args} { upvar $varname var set var [string_ trim $var[set var {}] {*}$args] return } proc valid {spec valuename} { upvar 1 $valuename value if {$spec in [encoding names]} { set decoded $value decode decoded $spec if {[info exists decoded]} { return 1 } else { return 0 } } else { switch $spec { default { error [list {unknown specification} $spec] } } } } proc validate {spec valuename} { upvar 1 $valuename string if {$spec in [encoding names]} { set decoded $string decode decoded $spec if {![info exists decoded]} { length for {set i 0} {$i < $len} {incr i} { index string $i string2 decode string2 $spec if {![info exists string2]} { return $i } } } else { return 0 } } else { switch $spec { default { error [list {unknown specification} $spec] } } } } namespace ensemble create -command validator -map { activate validator_activate } proc validator_activate validator { if {$validator in [encoding names]} { return } else { switch $validator { error [list {unknown validator} $validator] } } } namespace ensemble create -command validators -map { known validators_known ready validators_ready } proc validators_known {} { encoding names } proc validators_ready {} { encoding names } proc COUNT {} {return count} proc INDEXES {} {return indexes} proc INFO {} {return info} proc STRINGS {} {return strings}