ycl: Artifact [49d3e0c32b]

Artifact 49d3e0c32b514ed8b8e5da5edd7d509957887137:

File packages/string/lib/string.tcl — part of check-in [1c252a3cd0] at 2020-06-22 00:31:53 on branch trunk — chan new routines
carve

interpolate

keep
further development

best working version so far

notes
new routines

absorb addrspaceex

"eval" option to [ls]

"message" design for interactive output

add zip processing to [fin]

sqlite
lossless

work in progress

add test

string
printable

new interface

tree
much faster [last] and [pivot] routines
(user: pooryorick size: 19557)
#! /bin/env tclsh

package require {ycl proc}
[yclprefix] proc alias alias [yclprefix] proc alias
alias aliases [yclprefix] proc aliases

package require {ycl ns join}
alias ns [yclprefix] ns


alias [ns join {} tcl mathfunc max]
alias [ns join {} tcl mathfunc min]
alias [ns join {} tcl mathop]
alias [ns join {} tcl mathop -]
alias [ns join {} tcl mathop +]
alias [ns join {} tcl mathop /]
alias [ns join {} mathop <<]


alias [ns join [yclprefix] proc checkargs]
alias [ns join [yclprefix] proc stub]

aliases {
	{ycl list} {
		consume
		join
		ldedent dedent
	}
	{ycl math} {
		expr
	}

	{ycl var} {
		$
	}
}

package require {ycl ns join}
alias nsjoin [yclprefix] ns join

alias regsub_ [ns join {} regsub]
alias expr_ [ns join {} expr]
alias split_ [ns join {} split]
alias string_ [ns join {} string]


proc asnumeric value {
    set count [scan $value %lld result]
    if {$count} {
        return $result
    }
    set count [scan $value %llf result]
    if {$count} {
        return $result
    }
    return [scan $value %llx]
}


proc cat {resultname args} {
	upvar 1 $resultname result
	set res {}
	consume arg args {
		append res $arg[set arg {}]
	}
	set result $res[set res {}]
	return 
}


variable [nsjoin doc cmp] {
	description {
		compare two strings, returning the index at which they differ, or -1.
	}
}
proc cmp {str1 str2} {
	set start 0
	length str1 len1
	expr last1 {$len1 -1}
	length str2 len2
	expr last2 {$len2 -1}
	if {$last1 > $last2} {
		set last1 $last2[set last2 $last1; lindex {}]
	}
	set end [/ $last1 2]
	while 1 {
		if {$start > $last1} {
			if {$last2 > $last1} {
				if {$last1 == -1} {
					return 0
				} else {
					return [+ $start 1]
				}
			} else {
				return -1
			}
		}
		set str1range $str1
		range str1range $start $end
		set str2range $str2
		range str2range $start $end 
		if {$str1range eq $str2range} {
			expr start {$end + 1}
			set end [+ $end [max [/ [- $last1 $end] 2] 1]]
		} else {
			if {$start == $end} {
				return $start
			}
			set end [- $end [max [/ [- $end $start] 2] 1]]
		}
	}
	length str1 len1
	length str2 len2
	set max [- [min $len1 $len2] 1]
	return $end
}


proc dedent textname {
	upvar $textname text
	split text \n
	ldedent text
	join text \n
	return
}


variable [nsjoin doc delimit] {
	description {
		split input based on delimiters, which are themselves included in the
		output.  Earlier matches take precedence, and where there is a tie for
		position, shorter matches take precedence.  With re delimiters, the ^
		anchors the pattern to each index of $input that this function to for
		the next possible match.
	}
	args {
		input {
			description { what to delimit }
		}
		into {
			description {
				maximum number of pieces to chop input into. -1 means as many as
				possible.  This is a count of the delimited pieces, not the
				delimiters.

			}
			default {
				return -level 0 -1
			}
		}
		string {
			description {
				a string delimiter
			}
			default {}
			count -1
			process {
				lappend delimiters string $string
			}
		}
		match {
			description {
				a [string match]-style delimiter
			}
			default {}
			count -1
			process {
				lappend delimiters match $match
			}

		}
		re {
			description {
				a [regexp]-style delimiter
			}
			default {}
			count -1
			process {
				lappend delimiters re $re
			}
		}
		format {
			description {
				a list of specifiers what to return.  Valid specifiers are...

				info {
					return results as a dictionary which includes extended information 
				}
				strings {
					return strings
				}
				indexes {
					return indexes
				}
				count {
					return a count of matches
				}
			}
			default {
				set format [list indexes]
			}
			constrain {
				[all $format in $formats]
			}

		}
	}
}
stub delimit {input args} {
	package require {ycl list}
	interp alias {} [nsjoin [namespace current] all] {} [yclprefix] list all
} {
	set res [list]
	set delimiters [list]
	set formats [list [STRINGS] [INDEXES] [INFO] [COUNT]]
	checkargs [$ doc delimit] {*}$args
	set count 0
	length input
	set previous -1
	set add [list apply [list {first last type spec} {
		upvar count count
		upvar format format
		upvar strings strings
		upvar input input
		upvar res res
		if {[INFO] in $format} {
			set res2 [dict create first $first last $last type $type spec $spec]
			if {[STRINGS] in $format} {
				dict update res2 string string {
					set string $input
					range string $first $last
				}
			}
			lappend res $res2
		} else {
			if {[STRINGS] in $format} {
				set range $input
				lappend res [range range $first $last]
			} elseif {[COUNT] in $format} {
				set res $count
			} else {
				lappend res [list $first $last]
			}
		}
	} [namespace current]]]
	for {set i 0} {$i<$len} {incr i} {
		set matches [list]
		foreach {type spec} $delimiters {
			set match [list $type $spec] 
			switch -- $type {
				string {
					if {[string first $spec $input $i] == $i} {
						length spec slen
						expr match1 {$i + $slen - 1}
						lappend match $i $match1 
						lappend matches $match 
					}
				}
				match {
					set range $input
					if {[set shortmatch [shortmatch $spec [range range $i end]]] > 0} {
						expr last {$i + $shortmatch}
						lappend match $i $last
						lappend matches $match
					} 
				}
				re {
					#don't use -start switch here because semantics are different for ^
					set range $input
					set rematch [regexp -inline -indices $spec [range range $i end]]
					if {[llength $rematch]} {
						if {[llength [lindex $rematch 0]] != 1} {
							#discard submatches 
							lassign $rematch[set rematch {}] rematch
						}
						lassign $rematch first last
						expr first {$first + $i}
						expr last {$last + $i}
						lappend match $first $last
						lappend matches $match
					}
				}
			}
		}
		if {[llength $matches]} {
			incr count
			set matches [lsort -integer -index 2 $matches]
			lassign $matches match match2
			lassign $match type spec first last
			lassign $match2 type2 spec2 first2 last2
			if {$first == $first2 && $last2 < $last} {
				lassign $match2 type spec first last
			}
			if {$first > $previous + 1} {
				expr first1 {$previous+1}
				expr last1 {$first - 1}
				{*}$add $first1 $last1 unmatched {}
			}
			{*}$add $first $last $type $spec
			set match [list]
			set match2 [list]
			set previous $last
			set i $last
		}
		#$into-1 to count partitioned instead of partitions
		if {$into > -1 && $count >= $into-1 } {
			break
		}
	}
	if {$previous < $len - 1} {
		expr first1 {$previous + 1}
		expr last1 {$len - 1}
		{*}$add $first1 $last1 unmatched {}
	}
	return $res
}


proc doublequote varname {
	upvar $varname value
	regsub value -all {[\\\$\[\"]} {\\\0}
	set value \"$value[set value {}]\"
	return
}


apply [list {} {
	foreach {name directions verb verbed} {
		encode {convertto convertfrom} encode encoded
		decode {convertfrom convertto} decode decoded
	}  {
		variable [nsjoin doc @name@] {
			description
				like [encoding @name@@]
					but
						if a character can't be @verbed@
							returns an error

		}
		lassign $directions convertto convertfrom
		proc $name args [
			string map [list \
				@convertto@ [list $convertto] \
				@convertfrom@ [list $convertfrom] \
				@do@ $verb
			] {
				if {[llength $args] == 1} {
					lassign $args varname
					set encoding [encoding system]
				} else {
					lassign $args varname encoding
				}
				upvar 1 $varname string
				set new [encoding @convertto@ $encoding $string]
				set compare [encoding @convertfrom@ $encoding $new]
				if {$compare eq $string} {
					set string $new[set new {}]
				} else {
					if {[info exists string]} {
						unset string
					}
				}
				return
			}
		]
	}

} [namespace current]]


proc index args {
	switch [llength $args] {
		1 {
			set idx [lindex $args 0]
			set stringname string
			set resname char
		}
		2 {
			lassign $args stringname idx
			set resname char
		}
		3 {
			lassign $args stringname idx resname
		}
		default {
			error [list {wrong number args}]
		}
	}
	upvar $stringname string $resname res
	set res [string index $string $idx]
}

variable [nsjoin doc isdecimal] {
	description
		if $value is a decimal number
			returns $value
				with any surrounding whitespace removed
			otherwise
				returns the empty string

		a leading zero
			does not signify octal

			is interpreted as part of a decimal representation
}
proc isdecimal varname {
	upvar $varname value
	trim value
    if {
        [string is double -strict $value]
        && (
            ![string is entier $value]
            ||
            ![regexp {^\s*[+-]*?0[bBoOxX]?} $value]
        )
    } {
		# all numbers with a mantissa recognized here since only decimal
		# representations are allowed.
        return $value
    }

    # account for 0-padded decimal integers
    regsub value {^([+-])*0*([^[:space:]]*)$} {\1\2}
    if {[string is double $value]} {
        return $value
    }
	set value {}
}

proc isdict value {
	expr_ {![catch {dict size $value}]}
}


variable [ns join doc isnumeric] {
	description
		if $value is numeric
			returns $value
				with any surrounding whitespace removed
			otherwise
				returns the empty string

		a leading zero
			does not signify octal

			is interpreted as part of a decimal representation
}
proc isnumeric value {
    trim value
    # Use [string is double] to accept Inf and NaN
    if {[string is double $value]} {
        return $value
    }
    regsub value {^\s*([+-])*0[BOXbox]?0*([^[:space:]]*)\s*$} {\1\2}
    if {[string is double $value]} {
        return $value
    }
    return {} 
}


stub iter string {
	aliases {
		{ycl coro call} {
			autocall
			body
			hi
			reply
		}
	}
} {
	proc iter string {
		set name [nsjoin [namespace current] [info cmdcount]]
		set res [coroutine $name\0 apply [list string [body {
			length
			hi
			for {set i 0} {$i < $len} {incr i} {
				index string $i char
				reply $char
			}
		}] [namespace current]] $string]
		autocall $name
	}
	tailcall iter $string
}


proc length args {
	switch [llength $args] {
		0 {
			set stringname string
			set lenname len
		}
		1 {
			lassign $args stringname
			set lenname len
		} 
		2 {
			lassign $args stringname lenname
		}
	}
	upvar 1 $stringname string $lenname len
	set len [string_ length $string]
	return
}


proc prepend {stringname new} {
	upvar 1 $stringname string
	set string $new$string[set string {}] 
	return
}


variable [ns join doc range] {
	description
		like [string range]

			but in-place
}
proc range {stringname args} {
	upvar $stringname string
	set string [string_ range $string[set string {}] {*}$args]
	return $string
}


proc regsplit {exprs textvar} {
	upvar 1 $textvar text
    if {$text eq {}} {
        return $text
    }
    set regexp ((?:(?!$exprs|$).)*)($exprs|$)
    set text [
		lmap {x y z} [regexp -all -inline $regexp $text] {
			list $y $z
	}]
	join text
	if {[lindex $text end] eq {}} {
		#remove the last empty string that represents failure to find a
		#delimiter
		set text [lreplace $text[set text {}] end end]
	} else {
		# the regular expression doesn't detect the empty string after the
		# delimiter that ends the text
		lappend text {}
	}
	return
}


proc regsub {stringname args} {
	if {[llength $args] < 2} {
		error [list {wrong # args}]
	}
	upvar 1 $stringname string
	set subspec [lindex $args end]
	set expr [lindex $args end-1]
	regsub_ {*}[lrange $args 0 end-2] $expr $string[set string {}] $subspec string
	return
}


proc replace {stringname args} {
	upvar $stringname string
	set string [string replace $string[set string {}] {*}$args]
}


proc requiredecimal varname {
	upvar $varname value
	set newval $value
	isdecimal newval
	if {$newval eq {}} {
		error [list {not a decimal number} $value]
	}
	set value $newval
}


proc reverse stringname {
	upvar $stringname string
	set string [string reverse $string[set string {}]]
}


variable [ns join doc shortmatch] {
	description {
		same as [string match], but return -1 if $string doesn't match, and the
		index of the last char of the shortest match if it does
	}
}
proc shortmatch args {
	set string [lindex $args end]
	set args [lrange $args 0 end-1] 
	length
	expr incr {$len / 2}
	expr last {$len -1}
	set match -1
	set break 0
	while 1 {
		set range $string
		if {[string match {*}$args [range range 0 $last]]} {
			if {$match == $last} {
				break
			}
			set match $last
			expr last {$last - $incr}
			expr incr {max($incr / 2,1)}
		} else {
			if {$incr == 0 || $last >= $len} {
				break
			}
			expr last {$last + $incr}
		}
	}
	return $match 
}


variable [ns join doc split] {
	description {
		like the builtin [split]
			but
				takes the name of the variable that refers to the string

				stores the result in that variable
	}
}
proc split {varname args} {
	upvar $varname var
	set var [split_ $var[set var {}] {*}$args]
	return
}


variable [ns join doc splitalnum] {
	description {
		splits the value of the given variable into the alpha-numeric,
		whitespace, punctuation, and control strings that comprise the value

		stores the result in the given variable
	}
}
proc splitalnum varname {
	upvar $varname var
	splitpattern var {
		{^[[:space:]]*([[:alnum:]]+)[[:space:]]*}
		{^[[:space:]]*([[:punct:]]+)[[:space:]]*}
		{^[[:space:]]*([[:cntrl:]]+)[[:space:]]*}
		{^[[:space:]]*([^[:alnum:][:space:][:punct:][:cntrl:]]+)[[:space:]]*}
	}
}

variable [ns join doc splitalpha] {
	description {
		splits the value of the given variable into the alpha, numeric,
		whitespace, punctuation, and control strings that comprise the value

		stores the result in the given variable
	}
}
proc splitalpha varname {
	upvar $varname var
	splitpattern var {
		{^[[:space:]]*([[:alpha:]]+)[[:space:]]*}
		{^[[:space:]]*([[:digit:]]+)[[:space:]]*}
		{^[[:space:]]*([[:punct:]]+)[[:space:]]*}
		{^[[:space:]]*([[:cntrl:]]+)[[:space:]]*}
		{^[[:space:]]*([^[:alpha:][:space:][:digit:][:punct:][:cntrl:]])[[:space:]]*+}
	}
}

proc splitpattern {varname patterns} {
	upvar $varname var
	set res {}
	while {[string length $var] > 0} {
		set len [llength $res]
		# more specific expressions must occur before more general expressions
		foreach pattern $patterns {
			if {[regexp $pattern $var whole part]} {
				puts [list plackle $part]
				lappend res $part
				set var [string range $var[set var {}] [
					string length $whole] end] 
				break
			}
		}
		# given the expressions above, this should be impossible to reach
		if {[llength $res] == $len} {
			error [list {no match} $var]
		}
	}
	set var $res
}


namespace ensemble create -command to -map {
	hex to_hex
}


proc to_hex varname {
	upvar 1 $varname string
	length
	set res {}
	for {set i 0} {$i < $len} {incr i} {
		# given the current implementation of Tcl strings this is expensive 
		# but implement it this way anyway to minimize storage
		#    assuming that the implementation will improve 
		index 0 
		scan $char %c cardinal
		if {$cardinal > 255} {
			error [list {character larger than 1 byte} index $i]
		}
		append res [format %02x $cardinal]
		set string [string range $string 1 end]
	}
	set string $res
	return
}


variable [ns join doc template] {
	synopsis {
		template  ?varspec or directive ...?  string
	}
	description [

		{
			a concise way to invoke [string map]
			
			unless otherwise specified

				each mapped values is encoded as a list containing one item
			
			each $varspec
				is
					the name

						contains only characters valid in unbraced $
						substiution

						of a variable to substitute

				optionally preceded by a delimiter that ends with a character
				that isn't valid in unbraced $ substitution

				optionally followed by a delimiter that begins with a character
				which isn't valid in unbraced $ substituion
				
		}

		directives {
			# {
				The subsequent $varspec is not to be quoted with [list] .
			}

			= {
				The subsequent $varspec is a list
				containing the $odelim and optionally the $cdelim for the $varspec that
				follows it . If only $odelim is provided , $cdelim shares its value .
			}

			! {
				A $varspec value of  "!" is like "=" , but its effects remain in place
				for the all the following $varspec values .
			}
		}


	]
}
proc template args [string map [list @varchars@ {a-zA-z0-9_}] {
	if {![llength $args]} {
		error [list {wrong # args} [llength $args]]
	}
	set string [lindex $args end]
	set args [lrange $args[set args {}] 0 end-1]
	set script "[ns join {} string] map \[list "
	set state {}
	set changedefaultdelim 0
	set changedelim 0
	set list 1
	set odefault @
	set cdefault @
	set mode varspec
	foreach arg $args {
		switch $state {
			{} {
				switch $arg {
					= {
						set state changedelim
						set mode varname
						continue
					}
					! {
						set state changedefaultdelim
						set mode varspec
						continue
					}
					\# {
						set list 0
						continue
					}
				}
			}
			changedelim {
				set arg [lassign $arg[set arg {}] odelim]
				if {[llength $arg]} {
					lassign $arg cdelim
				}
				set changedelim 1
				set state {}
				continue
			}
			changedefaultdelim {
				set arg [lassign $arg[set arg {}] odefault]
				if {[llength $arg]} {
					lassign $arg cdefault
				} else {
					set cdefault $odefault
				}
				set changedefaultdelim 0
				set state {}
				continue
			}
			default {
				error [list {unknown state} $state]
			}
		}

		if {!$changedelim} {
			set odelim $odefault 
			set cdelim $cdefault
		}
		if {$mode eq {varspec}} {
			if {![regexp "^(.*\[^@varchars@])?(\[@varchars@]+|\[@varchars@]*\\(\[^\\)]*\\))(\[^a-zA-z0-9_].*)?$" \
				$arg -> odelim1 varname cdelim1]} {

				error [list {bad varspec} $arg]
			}
			if {$odelim1 ne {}} {
				set odelim $odelim1
			}
			if {$cdelim1 ne {}} {
				set cdelim $cdelim1
			}
		} else {
			set varname $arg
		}
		if {$list} {
			set qvarname "\[list \$$varname]"
	 	} else {
			set qvarname \$$varname
		}
		append script "$odelim$varname$cdelim $qvarname "
		set list 1
		set changedefaultdelim 0
		set changedelim 0
		set mode varspec
	}
	append script {] } [list $string]
	uplevel 1 $script 
}]


proc tolower name {
	upvar $name string
	set string [string tolower $string[set string {}]]
}


proc trim {varname args} {
	upvar $varname var
	set var [string_ trim $var[set var {}] {*}$args]
	return
}


proc valid {spec valuename} {
	upvar 1 $valuename value
	if {$spec in [encoding names]} {
		set decoded $value
		decode decoded $spec
		if {[info exists decoded]} {
			return 1
		} else {
			return 0
		}
	} else {
		switch $spec {
			default {
				error [list {unknown specification} $spec]
			}
		}
	}
}


proc validate {spec valuename} {
	upvar 1 $valuename string
	if {$spec in [encoding names]} {
		set decoded $string
		decode decoded $spec
		if {![info exists decoded]} {
			length
			for {set i 0} {$i < $len} {incr i} {
				index string $i string2
				decode string2 $spec
				if {![info exists string2]} {
					return $i
				}
			}
		} else {
			return 0
		}
	} else {
		switch $spec {
			default {
				error [list {unknown specification} $spec]
			}
		}
	}
}


namespace ensemble create -command validator -map {
	activate validator_activate
}


proc validator_activate validator {
	if {$validator in [encoding names]} {
		return
	} else {
		switch $validator {
			error [list {unknown validator} $validator]
		}
	}
}


namespace ensemble create -command validators -map {
	known validators_known
	ready validators_ready
}

proc validators_known {} {
	encoding names
}


proc validators_ready {} {
	encoding names
}


proc COUNT {} {return count}
proc INDEXES {} {return indexes}
proc INFO {} {return info}
proc STRINGS {} {return strings}