# nodesplit.tcl --
#
# Code to update quadcode sequences by node splitting - to attempt
# to isolate paths that operate on different data types, particularly
# within loops.
#
# Copyright (c) 2017 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#
# Why do we want to do node splitting on quadcode sequences, and what do
# we expect the results to be?
#
# A program like:
#
# proc x {a b c} {
# set x 0
# for {set i $a} {$i < $b} {incr i $c} {
# set x [expr {$x + $i}]
# }
# return $x
# }
#
# gives rise to performance trouble at runtime. It turns into code like
# the following. For clarity, error handling code is omitted, and the
# dummy blocks that result from critical edge splitting (and eventually
# acquire type widening and memory management code) also are not shown.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: b2 := phi(b1{1}, b3{7}) // STRING <- STRING x IMPURE NUMERIC
# c2 := phi(c1{1}, c7{7}) // STRING <- STRING x IMPURE NUMERIC
# i2 := phi(a1{1}, i7{7}) // STRING <- STRING x NUMERIC
# x2 := phi(0{1}, x5a{7}) // NUMERIC <- ZERO x NUMERIC
# if i2 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: i3 := impure numeric(i2) // IMPURE NUMERIC <- STRING
# if b2 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := impure numeric(b2) // IMPURE NUMERIC <- STRING
# t4 := (i3 < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t3 is false goto 7
# goto 5
#
# 5: x5 = impure numeric(x2) // IMPURE NUMERIC <- STRING
# x5a := x5 + i3 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# if c2 is not numeric goto error3 // <- STRING
# goto 6
#
# 6: c6 := impure numeric(c2) // IMPURE NUMERIC <- STRING
# i6 := i3 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 2
#
# 7: return x2 // <- NUMERIC
#
#-----------------------------------------------------------------------------
#
# There are no fewer than three string-to-numeric conversions, all
# inside the loop. What's more, the three corresponding phi operations promote
# numerics back to strings. What we need is some way to make the string
# conversions happen on only the first and last iterations.
#
# The way we approach this is to do successive block splitting. Whenever
# we encounter values of incompatible types (defined as being a STRING
# and a non-STRING, or an IMPURE object and a pure one, or objects of
# two non-STRING types that will combine to STRING), we know that we likely
# can improve code by splitting the block. This process will unroll at least
# part of the loop, hoisting out the data conversions.
#
# Note that if we are splitting a multi-exit block, we must introduce
# additional dummy blocks because each exit will become a critical edge.
#
# Generally speaking, splitting blocks will give rise to many more
# opportunities to split. We continue until nothing can be split further.
# (There may be a need for a safety check to avoid infinite splits in
# certain pathological cases.)
#
# Let's see what happens as we do this. After one split, we see:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: b3:= phi(b1{2}, b4{8}) // STRING <- STRING x IMPURE NUMERIC
# c3 := phi(c1{2}, c6{8}) // STRING <- STRING x IMPURE NUMERIC
# i3 := phi(a1{2}, i6{8}) // STRING <- STRING x NUMERIC
# x3 := phi(0{2}, x5a{8}) // NUMERIC <- ZERO x NUMERIC
# i3a := impure numeric(i3) // IMPURE NUMERIC <- STRING
# if b3 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := impure numeric(b3) // IMPURE NUMERIC <- STRING
# t4 := (i3a < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t4 is false goto 7
# goto 5
#
# 5: x5 = impure numeric(x3) // IMPURE NUMERIC <- STRING
# x5a := x5 + i3a // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# if c3 is not numeric goto error3 // <- STRING
# goto 6
#
# 6: c6 := impure numeric(c3) // IMPURE NUMERIC <- STRING
# i6 := i3a + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
# 7: return x3 // <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
# goto 3
#
#-----------------------------------------------------------------------------
#
# (There will also be phi's at the start of 'error0'. All error handling is
# omitted for clarity.)
#
# Note that the test in block 8 will always be false, and the existing
# optimizations in 'doTypeChecks' will eliminate it, and the block. But I
# defer reapplying the existing optimizations until all block splitting is
# complete.
#
# Block 3 now becomes a candidate for splitting. After the split, we have:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: i3 := impure numeric(a1) // IMPURE NUMERIC <- STRING
# if b1 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := phi(b1{3}, b4a{9}) // STRING <- STRING x IMPURE NUMERIC
# c4 := phi(c1{3}, c7{9}) // STRING <- STRING x IMPURE NUMERIC
# i4 := phi(i3{3}, i9{9}) // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
# x4 := phi(0{3}, x6a{9}) // NUMERIC <- ZERO x NUMERIC
# b4a := impure numeric(b4) // IMPURE NUMERIC <- STRING
# t4 := (i4 < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t4 is false goto 7
# goto 5
#
# 5: x5 = impure numeric(x4) // IMPURE NUMERIC <- STRING
# x5a := x5 + i4 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# if c4 is not numeric goto error3 // <- STRING
# goto 6
#
# 6: c6 := impure numeric(c4) // IMPURE NUMERIC <- STRING
# i6 := i4 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
# 7: return x4 // <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
# goto 10
#
# 9: i9 = impure numeric(i7) // NUMERIC <- NUMERIC
# if b4a is not numeric goto error1 // <- IMPURE NUMERIC
# goto 4
#-----------------------------------------------------------------------------
#
# The whole of block 9 consists of redundant operations, which will be
# eliminated. More importantly, the test on a1 has been moved out of
# the loop and will be performed only once. We keep on splitting, hitting
# block 4 next. Phi operations are needed in blocks 5 and 7.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: i3 := impure numeric(a1) // IMPURE NUMERIC <- STRING
# if b1 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := impure numeric(b1) // IMPURE NUMERIC <- STRING
# t4 := (i3 < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t4 is false goto 7
# goto 5
#
# 5: b5 := phi(b4{4}, b10{10}) // STRING <- STRING x IMPURE NUMERIC
# c5 := phi(c1{4}, c7{10}) // STRING <- STRING x IMPURE NUMERIC
# i5 := phi(i3{4}, i9{10}) // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
# x5 := phi(0{4}, x6a{10}) // NUMERIC <- ZERO x NUMERIC
# x5a := x5 + i5 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# if c5 is not numeric goto error3 // <- STRING
# goto 6
#
# 6: c6 := impure numeric(c5) // IMPURE NUMERIC <- STRING
# i6 := i5 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
# 7: x7 := phi(0{4}, x5a{10}) // NUMERIC <- ZERO x NUMERIC
# return x7 // <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
# goto 9
#
# 9: i9 = impure numeric(i6) // NUMERIC <- NUMERIC
# if b5 is not numeric goto error1 // <- IMPURE NUMERIC
# goto 10
#
# 10: b10 := impure numeric(b5) // IMPURE NUMERIC <- IMPURE NUMERIC
# t10 := (i6a < b5) // ZEROONE <- NUMERIC x IMPURE NUMERIC
# if t10 is false goto 7
# goto 5
#
#-----------------------------------------------------------------------------
#
# Another expensive type conversion moved out of the loop, and block 11
# begins with an instruction that will be eliminated in optimization.
# Onward to blocks 5 and 6!
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: i3 := impure numeric(a1) // IMPURE NUMERIC <- STRING
# if b1 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := impure numeric(b1) // IMPURE NUMERIC <- STRING
# t4 := (i3 < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t4 is false goto 7
# goto 5
#
# 5: x5 := 0 + i3 // NUMERIC <- ZERO x IMPURE NUMERIC
# if c1 is not numeric goto error3 // <- STRING
# goto 6
#
# 6: c6 := impure numeric(c1) // IMPURE NUMERIC <- STRING
# i6 := i3 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
# 7: x7 := phi(0{4}, x5{10}) // NUMERIC <- ZERO x NUMERIC
# return x7 // <- NUMERIC
#
# 8: b6 := phi(b4{6}, b10{12})
# // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# c8 := phi(c6{6}, c12{12}) // IMPURE NUMERIC x IMPURE NUMERIC
# // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# i8 := phi(i6{6}, i9{11}) // NUMERIC <- NUMERIC x NUMERIC
# x8 := phi(x5{5}, x11{11}) // NUMERIC <- ZERO x NUMERIC
# if i8 is not numeric goto error0 // <- NUMERIC
# goto 9
#
# 9: i9 = impure numeric(i6a) // NUMERIC <- NUMERIC
# if b6 is not numeric goto error1 // <- IMPURE NUMERIC
# goto 10
#
# 10: b10 := impure numeric(b6) // IMPURE NUMERIC <- IMPURE NUMERIC
# t10 := (i6a < b6) // ZEROONE <- NUMERIC x IMPURE NUMERIC
# if t10 is false goto 7
# goto 11
#
# 11: x11 := x8 + i8 // NUMERIC <- NUMERIC x NUMERIC
# if c6a is not numeric goto error3 // <- IMPURE NUMERIC
# goto 12
#
# 12: c12 = impure numeric(c8) // IMPURE NUMERIC <- IMPURE NUMERIC
# i12 := i8 + c8 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
#-----------------------------------------------------------------------------
#
# There's nothing left to split, because now all the data types at phi's
# are consistent, but there are a lot of tautologies to eliminate,
# useless type conversions to discard, and similar rubbish. After running
# the optimizations from other modules, the code will look something like:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0] // STRING <- STRING
# b1 := args[1] // STRING <- STRING
# c1 := args[2] // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
# goto 3
#
# 3: i3 := impure numeric(a1) // IMPURE NUMERIC <- STRING
# if b1 is not numeric goto error1 // <- STRING
# goto 4
#
# 4: b4 := impure numeric(b1) // IMPURE NUMERIC <- STRING
# t4 := (i3 < b4) // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
# if t4 is false goto 7
# goto 5
#
# 5: if c1 is not numeric goto error3 // <- STRING
# x5 = 0 + i3 // NUMERIC <- ZERO + IMPURE NUMERIC
# goto 6
#
# 6: c6 := impure numeric(c1) // IMPURE NUMERIC <- STRING
# i6 := i3 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
# 7: x7 := phi(0{4}, x5{10}) // NUMERIC <- ZERO x NUMERIC
# return x7 // <- NUMERIC
#
# 8: i8 := phi(i6{6}, i9{9}) // NUMERIC <- NUMERIC x NUMERIC
# x8 := phi(a1{6}, x11{11}) // NUMERIC <- ZERO x NUMERIC
# t8 := (i8 < b4) // ZEROONE <- NUMERIC x IMPURE NUMERIC
# if t8 is false goto 7
# goto 9
#
# 9: x9 := x8 + i6a // NUMERIC <- NUMERIC x NUMERIC
# i9 := i8 + c6 // NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
# goto 8
#
#-----------------------------------------------------------------------------
#
# A tightly optimized inner loop has emerged, once constant branches and
# do-nothing type conversions have been eliminated. All the pesky string
# representations are gone from the intermediate results. A few odds and
# ends of the first trip through the loop have distributed themselves
# elsewhere in the code.
#
# Now, how is this basic block splitting
#
# Essentially, it's a copying operation. We remove the 'jump'
# instruction at the end of each predecessor of the block being split,
# and replace it with the body of the split block. (All the predecessor blocks
# are single-exit, because we've done critical edge splitting.)
#
# We replace the phi instructions in the block with copies from the
# source operands. We track all the variables defined in the split
# code, since they now have multiple assignments to them, violating
# SSA. We add uses to the du-chains of all the variables used in the
# split code.
#
# Once the splitting is done, the first thing that we have to do is to
# make sure that there are no critical edges. Critical edges will have
# been introduced any time that we split a block that has multiple
# exits. We remove them by introducing a new, empty, basic block on each
# of the exits from the new block.
#
# The original block is now unreachable code. All variable uses
# in it are discarded, the block itself is emptied, and 'bbpred' is
# cleansed of references to it, both from it to its predecessors and from
# its successors to it.
#
# These manipulations have destroyed the dominance hierarchy, and this
# must be repaired. Currently, it is rebuilt wholesale. It would most
# likely be possible to constrain the rebuilding, using the ideas in
#
# Vugranam C. Sreedhar, Guang R. Gao and Yong-fong Lee. "Incremental
# Computation of Dominator Trees." ACM Trans. Progrm. Languages and
# Systems, 1995, pp. 1-12.
# http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.46.3846
#
# It might also be possible to avoid use of the dominator tree during
# this operation - see Section 5.4 of "SSA-based Compiler Design"
# (http://ssabook.gforge.inria.fr/latest/book.pdf) for how this might
# be carried out.
#
# We now have a program where the control flows are again correct, and
# the dominance relations are understood. The program is,
# nevertheless, not in SSA form. The violation of SSA is well
# characterized. We have available to us a set of variables that are
# assigned at multiple places, and the places where they are
# assigned. We can use a 'black box' SSA reconstruction algorithm on
# those variables to repair SSA.
#
# Finally, type analysis has to be repeated. Once again, this is
# currently done wholesale. It might be possible to constrain it to
# the newly-split variables and their phi-webs.
# There are algorithms known for repair of the dominance relations as
# edges are cut and spliced. This implementation more or less follows
# Vugranam C. Sreedhar, Guang R Gao, and Yong-Fong Lee; "Incremental
# Computation of Dominator Trees." Proc. ACM SIGPLAN Workshop on
# Intermediate Representations. San Francisco, Calif.:ACM, 1995,
# pp. 1-12.
#
# You have been warned.
#
#-----------------------------------------------------------------------------
# quadcode::transformer method insertSplitMarkers --
#
# Inserts markers into the code indicating the original basic
# blocks prior to any node splitting.
#
# Results:
# None.
#
# Side effects:
# Every basic block has a 'split' instruction added at its head.
# The 'split' has no result, and its sole source operand is a literal
# giving the original basic block number.
oo::define quadcode::transformer method insertSplitMarkers {} {
my variable bbcontent
set b -1
foreach bb $bbcontent {
incr b
lset bbcontent $b {}
set pc -1
foreach q $bb {
incr pc
if {[lindex $q 0 0] ni {"entry" "phi" "param"}} {
break
}
}
lset bbcontent $b [linsert $bb[set bb ""] $pc \
[list split {} [list literal $b]]]
}
return
}
# quadcode::transformer method countSplits --
#
# Walks through the quadcode, counting the number of split markers
# present for each original location in the code.
#
# Parameters:
# None.
#
# Results:
# Returns a list, indexed by the original basic block number,
# where the values are the number of copies of the original block that
# are present. Zero counts are possible and expected if the original
# code has been optimized away entirely.
oo::define quadcode::transformer method countSplits {} {
my variable bbcontent
set r {}
set b -1
foreach bb $bbcontent {
incr b
foreach q $bb {
if {[lindex $q 0 0] eq "split"} {
set origb [lindex $q 2 1]
while {$origb >= [llength $r]} {
lappend r 0
}
set n [lindex $r $origb]
incr n
lset r $origb $n
}
}
}
return $r
}
# quadcode::transformer method removeSplitMarkers --
#
# Removes markers that indicate the source of code when code splitting
#
# Results:
# None
#
# Side effects:
# Split markers are removed.
oo::define quadcode::transformer method removeSplitMarkers {} {
my variable bbcontent
set b -1
foreach bb $bbcontent {
incr b
set newbb {}
foreach q $bb {
if {[lindex $q 0] ne "split"} {
lappend newbb $q
}
}
lset bbcontent $b $newbb
}
return
}
# quadcode::transformer method nodesplit --
#
# Attempts to factor type checking out of loops by splitting a
# single node of the flow graph.
#
# Results:
# Return 1 if a node was split, 0 if no splittable node was found.
#
# Side effects:
# Major surgery on the program, that requires cleanup optimization
# and type inference (including possible repairs to interprocedural
# type inference).
oo::define quadcode::transformer method nodesplit {} {
# global splitcount
# if {[incr splitcount] > 10} {puts "$splitcount splits"; return 0}
my variable ns_counters
# Determine how many copies of each original basic block are present
set ns_counters [my countSplits]
my debug-nodesplit {
puts "Before node splitting:"
my dump-bb
}
# Walk the basic blocks in order, looking for things that could
# be improved by splitting
foreach b [my bborder] {
if {[my ns_splittable $b]} {
# Split a single basic block
my ns_cloneBB $b
my debug-nodesplit {
puts "After splitting:"
my dump-bb
}
my audit-phis "one split"
return 1
}
}
# Found nothing to split
return 0
}
# quadcode::transformer method ns_splittable --
#
# Determines whether a basic block is a candidate for splitting
#
# Parameters:
# b - Basic block number of the block being examined
#
# Results:
# Returns 1 if the block should be split, 0 otherwise.
#
# A basic block may be split if it contains a phi where 'substantially
# different' types arrive, and it does not begin with an indication that
# it has been split already.
oo::define quadcode::transformer method ns_splittable {b} {
my variable bbcontent
my variable ns_counters
my variable types
set bbSplitLimit 8
namespace upvar ::quadcode::dataType \
EMPTY EMPTY IMPURE IMPURE NUMERIC NUMERIC OTHERSTRING OTHERSTRING \
BOOLEAN BOOLEAN
set bb [lindex $bbcontent $b]
set pc -1
foreach q $bb {
incr pc
if {[lindex $q 0] ne "phi"} {
# We've come to the end of the phi operations, without finding
# a phi that would make the block splittable.
return 0
}
# $q is a phi operation in the block. Test whether the types of its
# args are substantially different.
set v1 [lindex $q 3]
set t1 [typeOfOperand $types $v1]
if {$t1 == 0} {
# Sanity check - die on mislinked variables
error "NOTHING ends up in $v1 at $b:$pc: $q"
}
set splitThisPhi 0
foreach {origin v2} [lrange $q 4 end] {
set splitThisPhi 0
set t2 [typeOfOperand $types $v2]
if {$t2 == 0} {
# Sanity check - die on mislinked variables
error "NOTHING ends up in $v2 at $b:$pc: $q"
}
# Split if one value is IMPURE, EMPTY or OTHERSTRING,
# while the other value is none of the above.
set expensive1 \
[expr {($t1 & ($IMPURE | $EMPTY | $OTHERSTRING)) != 0}]
set expensive2 \
[expr {($t2 & ($IMPURE | $EMPTY | $OTHERSTRING)) != 0}]
if {$expensive1 ^ $expensive2} {
set splitThisPhi 1
break
}
# Split if one value is OTHERSTRING and the other is
# some sort of IMPURE NUMERIC or IMPURE BOOLEAN
if {($t1 & $OTHERSTRING)
&& [::quadcode::dataType::isa $t2 \
[expr {$IMPURE | $NUMERIC | $BOOLEAN}]]} {
set splitThisPhi 1
break
}
if {($t2 & $OTHERSTRING)
&& [::quadcode::dataType::isa $t1 \
[expr {$IMPURE | $NUMERIC | $BOOLEAN}]]} {
set splitThisPhi 1
break
}
}
if {$splitThisPhi} {
set phi $q
my debug-nodesplit {
puts "$b:$pc: $q"
puts [format "\tmight be a candidate for node splitting\
%s = %#llx %s" \
$v1 $t1 [nameOfType $t1]]
if {$t1 < 0} {
puts " ~[nameOfType [expr {~$t1}]]"
}
puts [format "%s = %#llx %s" \
$v2 $t2 [nameOfType $t2]]
if {$t2 < 0} {
puts " ~[nameOfType [expr {~$t2}]]"
}
}
# We will add one more instance of the block for every
# predecessor, but delete one for the current instance
set delta [expr {[llength $phi]/2 - 2}]
set PC -1
# Make sure that the block hasn't been split too often already
while {[incr pc] < [llength $bb]} {
set q [lindex $bb $pc]
if {[lindex $q 1 0] eq "bb"} {
break
}
if {[lindex $q 0] eq "split"} {
set splitFrom [lindex $q 2 1]
set count [lindex $ns_counters $splitFrom]
if {$count + $delta > $bbSplitLimit} {
my debug-nodesplit {
puts "$b:$pc: $splitFrom has been split too many\
times already."
}
return 0
}
}
}
my debug-nodesplit {
puts "split block $b: [llength $bb] quads and\
[expr {([llength $phi] - 1)/2}] predecessors"
}
return 1
} else {
my debug-nodesplit {
puts "$b:$pc: $q"
puts "\tis not a candidate for node splitting"
puts " $v1 = $t1 [nameOfType $t1]"
if {$t1 < 0} {
puts " ~[nameOfType [expr {~$t1}]]"
}
puts " $v2 = $t2 [nameOfType $t2]"
if {$t2 < 0} {
puts " ~[nameOfType [expr {~$t2}]]"
}
}
}
}
error "fell off the end of a basic block"
}
# quadcode::transformer method ns_cloneBB --
#
# Clones a basic block that has been identified as a candidate
# for splitting.
#
# Parameters:
# b - Basic block number of the block being cloned
#
# Results:
# None.
#
# Side effects:
# Copies the block onto the end of its successors, and adds
# edges, blocks for critical edge splitting, and phi nodes as
# necessary to have the code in two places instead of one.
#
# The block must be multiple entry, therefore all its predecessors
# are single exit. That's the key concept that makes this splitting
# possible - that we can hoist the code up into the predecessors.
oo::define quadcode::transformer method ns_cloneBB {b} {
my variable bbcontent
my variable bbpred
my debug-nodesplit {
puts "Cloning and eliminating basic block $b:"
set bb [lindex $bbcontent $b]
set pc -1
foreach q $bb {
puts " $b:[incr pc]: $q"
}
puts "------------------------------"
}
# dups is a three-level dictionary. The first key is the name of
# a variable whose assignment has been duplicated. The second-level
# key is the number of the basic block where it now appears, and
# the content is the number of assignments to the variable that appear
# in the block.
set dups {}
# Copy the block's instructions into its predecessors. Split critical
# edges if the block has more than one exit.
set ps [lindex $bbpred $b]
dict for {pred -} $ps {
my ns_rewriteBlockOntoPred dups $b $pred
}
# If the block had multiple successors, its successors are all
# single entry and free of phi nodes. If it had a single successor,
# then any phi's in the successor need to have it removed and the
# clones inserted.
set ss [my bbsucc $b]
if {[llength $ss] eq 1} {
my ns_fixupPhis [lindex $ss 0] $b $ps
}
# Unlink du-chains from the split block, and remove its
# predecessor and successor links. Fixup bbidom, bbkids, and bblevel
# to reflect the new state of the DJ graph.
my ns_destroyBB $b
my bbidom
my bblevel
# Repair SSA
my debug-nodesplit {
my variable duchain
puts "Repair SSA after node splitting"
dict for {v defs} $dups {
puts " $v: [dict keys $defs] -> [dict keys [dict get $duchain $v]]"
}
my dump-bb
}
dict for {v defs} $dups {
my repairSSAVariable $v $defs
}
}
# quadcode::transformer method ns_rewriteBlockOnPred --
#
# Copies the instructions of the body of a basic block
# (which must be two-entry) onto the end of a predecessor,
# renaming all assigned variables.
#
# Parameters:
# dupsv - Name of a variable in the callers scope that holds a two-level
# dictionary tracking the duplicated assignment statements. The
# first-level keys are basic block numbers; the second-level
# keys are basic blocks where the variables are assigned, and
# the values are counts of assignments within the blocks.
# b - Number of the basic block to copy
# pred - Number of the predecessor block
#
# Results:
# None.
#
# Side effects:
# Copies the code, and updates du-chains of variables appearing in
# instructions being copied. Updates 'dupsv' with the locations of
# variable definitions in the copies. Splits critical edges if
# they are being introduced.
oo::define quadcode::transformer method ns_rewriteBlockOntoPred {dupsv b pred} {
upvar 1 $dupsv dups
my variable bbcontent
my variable bbpred
# Look up the code we're copying and the code that we're appending to.
set bb [lindex $bbcontent $b]
set pb [lindex $bbcontent $pred]
lset bbcontent $pred {}
set pb [lreplace $pb[set pb {}] end end]
my debug-nodesplit {
puts "Want to copy basic block $b into predecessor $pred"
set pc -1
foreach q $bb {
puts " $b:[incr pc]: $q"
}
puts " ------- "
set pc -1
foreach q $pb {
puts " $pred:[incr pc]: $q"
}
puts " +++++++ "
}
# Iterate through the instructions being copied
set key [list bb $pred]
set pc -1
set critical 0
foreach q $bb {
incr pc
set res [lindex $q 1]
# If we're copying a phi, replace it with a copy from the appropriate
# source variable.
set op [lindex $q 0 0]
switch -exact -- $op {
phi {
set repvar [dict get $q $key]
if {$repvar eq "Nothing"} {
set q [list unset $res]
} else {
set q [list copy $res $repvar]
}
}
}
# Examine the result of the operation. If the result is a value,
# add the copy to the dictionary of multiple assignments that will
# need SSA repair. If the operation is a conditional jump, peform
# critical edge splitting. On any jump, update the 'bbpred' link.
switch -exact -- [lindex $res 0] {
"temp" - "var" {
if {[dict exists $dups $res]} {
set d [dict get $dups $res]
dict set dups $res {}
} else {
set d {}
}
dict incr d $pred
dict set dups $res $d
}
"bb" {
if {[lindex $q 0] ne "jump"} {
set critical 1
}
if {$critical} {
set tgt [my makeEmptyBB [lindex $res 1]]
my debug-nodesplit {
puts " (* $tgt:0: [list jump $res] *)"
}
set res [list bb $tgt]
lset q 1 $res
}
my bblink $pred [lindex $res 1]
}
}
# Add du-chaining for the variable references in the copied code
foreach opd [lrange $q 2 end] {
if {[lindex $opd 0] in {"temp" "var"}} {
my addUse $opd $pred
}
}
# Copy the quad to its new home
my debug-nodesplit {
puts " $pred:[llength $pb]: $q"
}
lappend pb $q
}
lset bbcontent $pred $pb
return
}
# ns_fixupPhis --
#
# Repairs phi operations in a successor block after splitting a
# basic block.
#
# Parameters:
# b - Successor block
# orig - Original block that was split
# clones - Blocks into which the code was cloned.
#
# Results:
# None.
#
# Side effects:
# Phi operations are rewritten
oo::define quadcode::transformer method ns_fixupPhis {b orig clones} {
my variable bbcontent
my debug-nodesplit {
puts " redirect phi operations in $b that refer to $orig to use\
[dict keys $clones] instead"
}
set bb [lindex $bbcontent $b]
lset bbcontent $b {}
set key [list bb $orig]
set repls [lmap {c -} $clones {list bb $c}]
set pc -1
foreach q $bb {
if {[lindex $q 0] ne "phi"} {
break
}
incr pc
set newq {}
dict for {from fromvar} $q {
if {$from ne $key} {
lappend newq $from $fromvar
} else {
foreach newfrom $repls {
lappend newq $newfrom $fromvar
}
}
}
my debug-nodesplit {
puts " $b:$pc: redirected: $newq"
}
lset bb $pc $newq
}
lset bbcontent $b $bb
return
}
# ns_destroyBB --
#
# Destroys a basic block after node splitting has replaced it with
# one or more new instances.
#
# Parameters:
# b - Basic block number to be destroyed
#
# Results:
# None.
#
# Side effects:
# All values defined in the block have their du-chains unlinked.
# The block is erased from 'bbcontent' and 'bbpred'.
#
# The next time that 'deadcode' runs, the last vestiges of the block,
# which is now unreachable, will be eliminated entirely.
oo::define quadcode::transformer method ns_destroyBB {b} {
my variable bbcontent
my variable bbpred
# Unlink du-chains
set bb [lindex $bbcontent $b]
set key [list bb $b]
foreach q $bb {
foreach opd [lrange $q 2 end] {
if {[lindex $opd 0] in {"var" "temp"}} {
my removeUse $opd $b
}
}
}
# Unlink predecessor relationships
foreach s [my bbsucc $b] {
set ps [lindex $bbpred $s]
lset bbpred $s {}
dict unset ps $b
lset bbpred $s $ps
}
# Remove the block from 'bbpred' and 'bbcontent'
set preds [lindex $bbpred $b]
lset bbpred $b {}
lset bbcontent $b {}
}