# types.tcl --
#
# Procedures to assign types to the values in a quadcode sequence.
#
# Copyright (c) 2015 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------
interp alias {} quadcode::tcl::mathfunc::istype {} quadcode::dataType::isa
# Data types
namespace eval quadcode::dataType {
namespace export mightbea isa allbut typeIntersect typeUnion
# IMPURE - Any value that has a known internal representation may
# have the IMPURE indicator to show that it has a string
# representation that must e preserved.
variable IMPURE [expr 0x4000000]
# BOTTOM - means an inconsistency. We have contradictory information
# about a given value. Should not happen.
variable BOTTOM 0
# EMPTY - the value is the empty string. It is safe for the representation
# of EMPTY to be void.
variable EMPTY [expr 0x1]
# BOOLWORD - the value is a Boolean word: yes/no, on/off, true/false.
# Constants of this type will always be impure, since the
# word cannot be constructed from the single bit of the
# internal representation.
variable BOOLWORD [expr 0x2]
# CONST0 - the value is the constant 0
# A constant of this type will be impure unless it is the
# literal string '0'.
variable CONST0 [expr 0x4]
# CONST1 - the value is the constant 1
# A constant of this type will be impure unless it is the
# literal string '1'.
variable CONST1 [expr 0x8]
# ZEROONE - the value is 0 or 1.
# A constant of this type will be impure unless it is one
# of the literal strings '0' or '1'.
variable ZEROONE [expr {$CONST0 | $CONST1}]
variable BOOL_INT $ZEROONE
# Boolean - the value is a Boolean constant
# A constant of this type will be impure unless it is one
# of the literal strings '0' or '1'.
variable BOOLEAN [expr {$BOOLWORD | $ZEROONE}]
# OTHERINT32 - the value is a native integer known to be other than 0 or 1
# A constant $x of this type will be pure iff
# {int($x) eq $x} - that is, it is an integer in
# canonical form.
variable OTHERINT32 [expr 0x10]
# INT32 - the value is a native integer
variable INT32 [expr {$ZEROONE | $OTHERINT32}]
# A constant $x of this type will be pure iff
# {int($x) eq $x} - that is, it is an integer in
# canonical form.
# INT64 - the value is a native 64-bit integer
# OTHERINT64 - the value is a native 64-bit integer known to be too
# large to fit in a 32-bit word.
# A constant $x of this type will be pure iff
# {wide($x) eq $x} - that is, it is an integer in
# canonical form.
variable OTHERINT64 [expr 0x20]
variable INT64 [expr {$INT32 | $OTHERINT64}]
variable INT $INT64
# BIGINT - the value is an integer that does not fit into a native integer
# A constant $x of this type will be pure iff
# {entier($x) eq $x} - that is, it is an integer in
# canonical form.
variable BIGINT [expr 0x40]
# ENTIER - the value is an integer; unknown whether it is native
# A constant $x of this type will be pure iff
# {entier($x) eq $x} - that is, it is an integer in
# canonical form.
variable ENTIER [expr {$INT | $BIGINT}]
# DOUBLE - the value is a double-precision floating-point constant
# A constant $x of this type will be pure iff
# {double($x) eq $x} - that is, it is a 'double' in
# canonical form.
variable DOUBLE [expr 0x80]
# NUMERIC - the value is a number of some sort.
# A constant $x of this type will be pure iff either
# {entier($x) eq $x} or {double($x) eq $x} - that is,
# it is a number in canonical form.
variable NUMERIC [expr {$DOUBLE | $ENTIER}]
# FOREACH - the value represents the iterator of a [foreach] or [lmap].
# There are no constants of this type, and it is therefore
# always pure.
variable FOREACH [expr 0x10000]
# DICTITER - the value represents the iterator of a [dict for] or related
# operation.
# There are no constants of this type, and it is therefore
# always pure.
variable DICTITER [expr 0x20000]
# OTHERSTRING - the value is a string that is none of the above.
# This type is always impure, and its internal representation
# may be void because the string representation is the
# only representation.
variable OTHERSTRING [expr 0x8000000]
variable IMPUREOTHERSTRING [expr {$OTHERSTRING | $IMPURE}]
# CALLFRAME - the value represents the state of the callframe.
variable CALLFRAME [expr 0x10000000]
# FAIL - the value has resulted from a failed computation. It represents
# the failure state of the interpreter - likely the return options.
# This type standing alone is always pure, but is almost always
# combined with another type that may be pure or impure
variable FAIL [expr 0x20000000]
# NEXIST - the value does not exist. This is a value akin to NULL
# This type standing alone is always pure, but is almost always
# combined with another type that may be pure or impure
variable NEXIST [expr 0x40000000]
# STRING - the value is an actual value, not a failure nor a missing value
# This type is always impure, and its internal representation
# may be void because the string representation is the
# only representation.
variable STRING [expr {~($CALLFRAME | $FAIL | $NEXIST
| $DICTITER | $FOREACH)}]
# TOP - means no information. We do not know whether a value exists;
# we do not know its type; we do not know whether it resulted from
# an error in a computation. Also should not happen except possibly
# as an initial value in an iterative calculation of types.
variable TOP -1
# isa --
#
# Tests the 'is-a' relationship
#
# Parameters:
# type1, type2 - Type codes
#
# Result:
# Returns 1 if any instance of type1 is an instance of type2.
# Returns 0 otherwise
#
proc isa {type1 type2} {
variable IMPURE
if {$type1 & $IMPURE} {
set type1 [expr {$type1 & ~$IMPURE}]
set type2 [expr {$type2 & ~$IMPURE}]
}
return [expr {! ($type1 & ~$type2) }]
}
# mightbea --
#
# Tests the 'might-be-a' relationship
#
# Parameters:
# type1, type2 - Type codes
#
# Results:
# Returns 1 if some instance of type1 is an instance of type2.
# Returns 0 if there is no intersection between the types.
proc mightbea {type1 type2} {
variable IMPURE
if {$type1 & $IMPURE} {
set type1 [expr {$type1 & ~$IMPURE}]
set type2 [expr {$type2 & ~$IMPURE}]
}
expr {($type1 & $type2) != 0}
}
# allbut --
#
# Complement of a data type
#
# Parameters:
# type - Type code
#
# Results:
# Returns a type code representing that a value is NOT of the given
# type.
proc allbut {type} {
expr {~$type}
}
# typeIntersect --
#
# Intersection of two data types
#
# Parameters:
# type1, type2 - Type codes of the types whose intersection is needed
#
# Results:
# Returns a type code representing the intersection
proc typeIntersect {type1 type2} {
expr {$type1 & $type2}
}
# typeUnion --
#
# Union of two data types
#
# Parameters:
# type1, type2 - Type codes of the types whose intersection is needed
#
# Results:
# Returns a type code representing the intersection
proc typeUnion {type1 type2} {
expr {$type1 | $type2}
}
# existence --
#
# Tests whether an object might exist
#
# Parameters:
# opd - A quadcode operand
#
# Results:
# Returns 'yes', 'no' or 'maybe'
proc existence {types opd} {
variable NEXIST
set type [quadcode::typeOfOperand $types $opd]
if {$type == $NEXIST} {
return no
} elseif {!($type & $NEXIST)} {
return yes
} else {
return maybe
}
}
# success --
#
# Tests whether an object represents a successful result.
#
# Parameters:
# opd - A quadcode operand
#
# Results:
# Returns 'yes', 'no' or 'maybe'
proc success {types opd} {
variable FAIL
set type [quadcode::typeOfOperand $types $opd]
if {$type == $FAIL} {
return no
} elseif {!($type & $FAIL)} {
return yes
} else {
return maybe
}
}
}
# nameOfType --
#
# Determines the name of a type
#
# Parameters:
# type - Numeric representation of a type
#
# Results:
# Returns the name of the type
proc quadcode::nameOfType {type} {
if {$type == 0} {
return NOTHING
}
set result {}
foreach {name wname} {
CALLFRAME CALLFRAME
NEXIST NEXIST
FAIL FAIL
DICTITER DICTITER
FOREACH FOREACH
OTHERSTRING STRING
IMPURE IMPURE
EMPTY EMPTY
} {
namespace upvar dataType $name t
if {$type & $t} {
namespace upvar dataType $wname w
lappend result $wname
set type [expr {$type & ~$w}]
}
}
if {($type & $dataType::ENTIER)
&& ($type & $dataType::DOUBLE)} {
lappend result NUMERIC
set type [expr {$type & ~ $dataType::NUMERIC}]
}
foreach {name wname} {
DOUBLE DOUBLE
BIGINT ENTIER
OTHERINT64 INT
OTHERINT32 INT
ZEROONE ZEROONE
BOOLWORD BOOLEAN
} {
namespace upvar dataType $name t
if {$type & $t} {
namespace upvar dataType $wname w
lappend result $wname
set type [expr {$type & ~$w}]
}
}
return $result
}
# inferTypes --
#
# Performs type inference on quadcode
#
# Results:
# None.
#
# Side effects:
# Stores type and rewritten quadcodes in this object, which should
# have been obtained from the 'variant' method so that a 'clean'
# version of the original is available for other specializations.
oo::define quadcode::transformer method inferTypes {} {
my debug-inferTypes {
puts "Before type inference:"
my dump-bb
}
namespace upvar ::quadcode::dataType BOTTOM BOTTOM FAIL FAIL STRING STRING
# Initialize all types to BOTTOM
set types {}
dict for {v -} $udchain {
dict set types $v $BOTTOM
}
dict set types return $BOTTOM
# Put all basic blocks on the worklist for processing in depth-first
# order
set worklist {}
for {set b [expr {[llength $bbcontent]-1}]} {$b >= 0} {incr b -1} {
lappend worklist $b
}
# Process blocks from the worklist
while {[llength $worklist] > 0} {
set b [lindex $worklist end]
set worklist [lrange $worklist[set worklist {}] 0 end-1]
set content [lindex $bbcontent $b]
# Process instructions in each block from top to bottom
set pc 0
foreach q $content {
switch -exact -- [lindex $q 0] {
return {
dict set types return \
[expr {[dict get $types return]
| [typeOfOperand $types [lindex $q 3]]}]
}
returnException {
dict set types return \
[expr {[dict get $types return] | $FAIL}]
}
default {
set rvar [lindex $q 1]
if {[lindex $rvar 0] in {"var" "temp"}} {
set type [my typeOfResult $q]
if {$type != [dict get $types $rvar]} {
dict set types $rvar $type
if {[dict exists $duchain $rvar]} {
dict for {use -} [dict get $duchain $rvar] {
set idx [lsearch -sorted -integer \
-decreasing -bisect \
$worklist $use]
if {[lindex $worklist $idx] != $use} {
set worklist \
[linsert \
$worklist[set worklist {}]\
[expr {$idx+1}] $use]
}
}
}
}
}
}
}
}
}
my debug-inferTypes {
puts "Types inferred:"
foreach {v type} [lsort -dictionary -index 0 -stride 2 $types] {
puts [format "%s: %#x (%s)" $v: $type [nameOfType $type]]
if {$type < 0} {
puts " ~[nameOfType [expr {~$type}]]"
}
}
}
}
# typeOfResult --
#
# Computes the type of the result of an operation
#
# Parameters:
# q - A single three address instruction
#
# Results:
# Returns the deduced data type of q's left hand side
oo::define quadcode::transformer method typeOfResult {q} {
namespace upvar ::quadcode::dataType {*}{
DOUBLE DOUBLE INT INT STRING STRING FAIL FAIL EMPTY EMPTY
BOOL_INT BOOL ENTIER ENTIER NUMERIC NUMERIC IMPURE IMPURE
VOID VOID CALLFRAME CALLFRAME DICTITER DICTITER FOREACH FOREACH
NEXIST NEXIST
}
switch -exact -- [lindex $q 0 0] {
debug-value {
return [typeOfOperand $types [lindex $q 3]]
}
widenTo {
return [lindex $q 0 1]
}
narrowToType {
set targetTypeCode [lindex $q 0 1]
return [quadcode::dataType::typeIntersect $targetTypeCode \
[typeOfOperand $types [lindex $q 2]]]
}
entry {
return $CALLFRAME
}
param {
if {[lindex $q 2 1] < [llength $ptype]} {
return [lindex $ptype [lindex $q 2 1]]
} else {
return $STRING
}
}
moveToCallFrame {
return [typeOfOperand $types [lindex $q 2]]
}
moveFromCallFrame {
return [expr {$NEXIST | $STRING}]
}
add -
mult -
sub {
set t1 [expr {[typeOfOperand $types [lindex $q 2]] & ~$IMPURE}]
set t2 [expr {[typeOfOperand $types [lindex $q 3]] & ~$IMPURE}]
if {istype($t1,$INT) && istype($t2,$INT)} {
# Surely not right in the presence of overflow, but keep
# until we decide what to do about overflows. Donal?
return $INT
} elseif {istype($t1,$ENTIER) && istype($t2,$ENTIER)} {
return $ENTIER
} elseif {istype($t1,$DOUBLE) || istype($t2,$DOUBLE)} {
return $DOUBLE
} else {
return $NUMERIC
}
}
div - expon {
set t1 [expr {[typeOfOperand $types [lindex $q 2]] & ~$IMPURE}]
set t2 [expr {[typeOfOperand $types [lindex $q 3]] & ~$IMPURE}]
if {istype($t1,$INT) && istype($t2,$INT)} {
# Surely not right in the presence of overflow, but keep
# until we decide what to do about overflows. Donal?
return [expr {$INT | $FAIL}]
} elseif {istype($t1,$ENTIER) && istype($t2,$ENTIER)} {
return [expr {$ENTIER | $FAIL}]
} elseif {istype($t1,$DOUBLE) || istype($t2,$DOUBLE)} {
return [expr {$DOUBLE | $FAIL}]
} else {
return [expr {$NUMERIC | $FAIL}]
}
}
mod {
return [expr {$INT | $FAIL}]
}
bitand -
bitnot -
bitor -
bitxor -
foreachIter -
lshift -
maptoint -
returnCode -
rshift -
strcmp -
strfind -
strlen -
strrfind {
return $INT
}
copy {
return [typeOfOperand $types [lindex $q 2]]
}
purify {
return [expr {[typeOfOperand $types [lindex $q 2]] & ~$IMPURE}]
}
unset {
return $NEXIST
}
initException {
return [expr {[typeOfOperand $types [lindex $q 2]] | $FAIL}]
}
extractMaybe {
return [expr {[typeOfOperand $types [lindex $q 2]] & ~$FAIL}]
}
extractExists {
return [expr {[typeOfOperand $types [lindex $q 2]] & ~$NEXIST}]
}
exists -
arrayExists -
dictExists -
foreachMayStep -
dictIterDone -
eq -
ge -
gt -
instanceOf -
isBoolean -
land -
le -
lor -
lt -
neq -
strclass -
streq -
strmatch -
strneq {
return $BOOL
}
not {
set t [typeOfOperand $types [lindex $q 2]]
if {istype($t,$NUMERIC)} {
return $BOOL
} else {
return [expr {$BOOL | $FAIL}]
}
}
regexp - listIn {
return [expr {$BOOL | $FAIL}]
}
listLength - dictSize {
return [expr {$INT | $FAIL}]
}
phi {
set r 0
foreach {from operand} [lrange $q 2 end] {
set r [expr {$r | [typeOfOperand $types $operand]}]
}
return $r
}
uminus - uplus {
set otype [typeOfOperand $types [lindex $q 2]]
set t1 [expr {[typeOfOperand $types [lindex $q 2]] & ~$IMPURE}]
if {istype($t1, $DOUBLE)} {
return $DOUBLE
} elseif {istype($t1, $INT)} {
return $INT
} elseif {istype($t1, $ENTIER)} {
return $ENTIER
} else {
return $NUMERIC
}
}
invoke {
# We know the result type of a handful of the things
# that might be invoked
if {[lindex $q 3 0] eq "literal"} {
set rtype [my typeOfInvoke [lindex $q 3 1] [lrange $q 4 end]]
} else {
set rtype [expr {$FAIL | $STRING}]
}
set inty [typeOfOperand $types [lindex $q 2]]
return [expr {($inty & $CALLFRAME) | $rtype}]
}
callFrameNop - startCatch {
return $CALLFRAME
}
nsupvar - upvar - variable {
return [expr {$CALLFRAME | $BOOL | $FAIL}]
}
retrieveResult {
# Pull from the callframe of the earlier 'invoke'
return [expr {[typeOfOperand $types [lindex $q 2]] & ~$CALLFRAME}]
}
extractCallFrame {
# Trim the non-callframe part
return $CALLFRAME
}
list - unshareList -
result - returnOptions -
dictIterKey - dictIterValue -
concat - strcat - strmap - strtrim - strcase {
return $STRING
}
foreachAdvance {
return $FOREACH
}
foreachStart {
return [expr {$FOREACH | $FAIL}]
}
strindex - strrange - strreplace -
listAppend - listConcat - listIndex - listSet -
dictSet - dictGet - listRange - dictUnset -
dictAppend - dictIncr - dictLappend {
return [expr {$STRING | $FAIL}]
}
dictIterStart {
return [expr {$DICTITER | $FAIL}]
}
dictIterNext {
return $DICTITER
}
initIfNotExists {
set vartype [typeOfOperand $types [lindex $q 2]]
set deftype [typeOfOperand $types [lindex $q 3]]
return [expr {$deftype | ($vartype & ~$NEXIST)}]
}
resolveCmd {
return $STRING
}
originCmd {
return [expr {$STRING | $FAIL}]
}
default {
error "Cannot infer type of result of $q"
}
}
}
# typeOfInvoke --
#
# Determines the data type of an invoked command, given the
# command name and args
#
# Parameters
# command - Fully qualified name of command being invoked
# argList - Arguments passed to the command
#
# Results:
# Returns a data type
oo::define quadcode::transformer method typeOfInvoke {command argList} {
namespace upvar ::quadcode::dataType \
DOUBLE DOUBLE FAIL FAIL INT INT NUMERIC NUMERIC \
STRING STRING ZEROONE ZEROONE
if {$specializer ne {}} {
set typeList [lmap arg $argList {
typeOfOperand $types $arg
}]
set retval [$specializer resultType $command $typeList]
return $retval
}
switch [lindex [builtinCommandType $command] 1] {
DOUBLE {
return $DOUBLE
}
INT {
return $INT
}
NUMERIC {
return $NUMERIC
}
BOOLEAN {
return $ZEROONE
}
default {
return [expr {$STRING | $FAIL}]
}
}
}
# typeOfOperand --
#
# Computes the type of an operation's operand
#
# Parameters:
# varTypes -- Data types inferred so far
# opd -- Operand to compute
proc quadcode::typeOfOperand {varTypes opd} {
switch -exact [lindex $opd 0] {
Nothing {
return $::quadcode::dataType::NEXIST
}
literal {
return [typeOfLiteral [lindex $opd 1]]
}
var -
temp {
if {[dict exists $varTypes $opd]} {
return [dict get $varTypes $opd]
} else {
return $::quadcode::dataType::NEXIST
}
}
default {
error "What is the type of $opd?"
}
}
}
# typeOfLiteral --
#
# Determines whether a literal represents a number, and returns
# its type.
#
# Parameters:
# value - Literal value
#
# Results:
# Returns one of the data types
proc quadcode::typeOfLiteral {x} {
if {$x eq {}} {
return $dataType::EMPTY
} elseif {[string is entier -strict $x]} {
set y [expr {entier($x)}]
if {$y eq $x} {
set impure 0
} else {
set impure $dataType::IMPURE
}
if {$x >= -0x80000000 && $x <= 0x7fffffff} {
if {$x == 0} {
return [dataType::typeUnion $dataType::CONST0 $impure]
} elseif {$x == 1} {
return [dataType::typeUnion $dataType::CONST1 $impure]
} else {
return [dataType::typeUnion $dataType::INT $impure]
}
} else {
return [dataType::typeUnion $dataType::ENTIER $impure]
}
} elseif {[string is double -strict $x]} {
set y [expr {double($x)}]
if {$y eq $x} {
return $dataType::DOUBLE
} else {
return [dataType::typeUnion $dataType::DOUBLE $dataType::IMPURE]
}
} elseif {[string is boolean -strict $x]} {
return [dataType::typeUnion $dataType::BOOLEAN $dataType::IMPURE]
} else {
return $dataType::IMPUREOTHERSTRING
}
}
# builtinCommandType -
#
# Describes what the prototypical type of a command should be.
#
# Parameters:
# commandName - The name of the command to get the type of, including
# such qualification as is available.
#
# Results:
# A two item list, where the first item is the type of the argument
# and the second argument is the type of the result. Types are
# described by a word such as INT or DOUBLE. Unrecognised commands
# give the empty list.
proc quadcode::builtinCommandType {commandName} {
switch [string trimleft $commandName :] {
tcl::mathfunc::acos - tcl::mathfunc::asin - tcl::mathfunc::atan -
tcl::mathfunc::ceil - tcl::mathfunc::cos - tcl::mathfunc::cosh -
tcl::mathfunc::exp - tcl::mathfunc::floor - tcl::mathfunc::log -
tcl::mathfunc::log10 - tcl::mathfunc::sin - tcl::mathfunc::sinh -
tcl::mathfunc::sqrt - tcl::mathfunc::tan - tcl::mathfunc::tanh {
return {DOUBLE DOUBLE}
}
tcl::mathfunc::double {
return {NUMERIC DOUBLE}
}
tcl::mathfunc::bool {
return {NUMERIC BOOLEAN}
}
tcl::mathfunc::entier - tcl::mathfunc::int - tcl::mathfunc::round -
tcl::mathfunc::wide {
return {NUMERIC INT}
}
tcl::mathfunc::atan2 - tcl::mathfunc::hypot - tcl::mathfunc::pow {
return {{DOUBLE DOUBLE} DOUBLE}
}
tcl::mathfunc::isqrt {
return {INT INT}
}
tcl::mathfunc::abs {
return {NUMERIC NUMERIC}
}
tcl::mathfunc::rand - tcl::mathfunc::srand {
return -code error "random numbers not currently supported"
}
}
}
# assignParameterTypes -
#
# Assigns types for the parameters of a procedure
#
# Results:
# None.
#
# Side effects:
# Updates types so that input variables have assigned types
# if type requirements are known.
#
# Constructs a dictionary 'ptype' containing just the parameter
# types.
#
# This procedure exist as scaffolding until the basic block representation
# is attached to the code issuer. It depends on 'flatten' already having
# reconstructed the quadcode.
oo::define quadcode::transformer method assignParameterTypes {types} {
namespace upvar ::quadcode::dataType VOID VOID STRING STRING
# If parameter types are assigned already, just return
if {$ptype ne {}} {
return $types
}
# Set parameter types according to 'my requiredInputType'
set ptype {}
set isParam {}
foreach q $quads {
switch -exact -- [lindex $q 0] {
entry {
}
param {
set v [lindex $q 1]
dict set types $v $STRING
dict set isParam $v {}
}
default {
foreach v [lrange $q 2 end] {
if {[dict exists $isParam $v]} {
set t [my requiredInputType $q $v]
set basetype $STRING
if {[dict exists $types $v]} {
set basetype [dict get $types $v]
}
set newtype [expr {$basetype & $t}]
dict set types $v $newtype
dict set ptype [lindex $v 1] $newtype
}
}
}
}
}
return $types
}
# requiredInputType -
#
# Determines a required type for a variable
#
# Parameters:
# q - Three address instruction
# v - Name of a variable appearing in the instruction's input list
#
# Results:
# Returns a type code
#
# Totally half-arsed implementation needed to get the LLVM connection going
oo::define quadcode::transformer method requiredInputType {q v} {
namespace upvar ::quadcode::dataType \
INT INT DOUBLE DOUBLE NUMERIC NUMERIC STRING STRING
switch -exact -- [lindex $q 0] {
invoke {
if {[lindex $q 2 0] eq "literal"
&& [llength [lindex $q 2]] < 3} {
switch [lindex [builtinCommandType [lindex $q 2 1]] 0 0] {
INT {
return $INT
}
DOUBLE {
return $DOUBLE
}
NUMERIC {
return $NUMERIC
}
default {
return $STRING
}
}
}
return $STRING
}
strindex {
if {$v eq [lindex $q 3]} {
return $INT
}
return $STRING
}
strrange - strreplace {
if {$v in [lrange $q 3 4]} {
return $INT
}
return $STRING
}
default {
return $STRING
}
}
}