Artifact [6df1070862]

Artifact 6df107086221831725896e6ca3eb9cc11bc98ad0:


# nodesplit.tcl --
#
#	Code to update quadcode sequences by node splitting - to attempt
#	to isolate paths that operate on different data types, particularly
#	within loops.
#
# Copyright (c) 2017 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------

#-----------------------------------------------------------------------------
#
# Why do we want to do node splitting on quadcode sequences, and what do
# we expect the results to be?
#
# A program like:
#
#	proc x {a b c} {
#	    set x 0
#	    for {set i $a} {$i < $b} {incr i $c} {
#               set x [expr {$x + $i}]
#           }
#	    return $x
#       }
#
# gives rise to performance trouble at runtime. It turns into code like
# the following. For clarity, error handling code is omitted, and the
# dummy blocks that result from critical edge splitting (and eventually
# acquire type widening and memory management code) also are not shown.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: b2 := phi(b1{1}, b3{7})    // STRING <- STRING x IMPURE NUMERIC
#    c2 := phi(c1{1}, c7{7})	// STRING <- STRING x IMPURE NUMERIC
#    i2 := phi(a1{1}, i7{7})    // STRING <- STRING x NUMERIC
#    x2 := phi(0{1}, x5a{7})	// NUMERIC <- ZERO x NUMERIC
#    if i2 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(i2)	// IMPURE NUMERIC <- STRING
#    if b2 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b2)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t3 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x2)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i3		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c2 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c2)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 2
#
# 7: return x2			// <- NUMERIC
#
#-----------------------------------------------------------------------------
#
# There are no fewer than three string-to-numeric conversions, all
# inside the loop. What's more, the three corresponding phi operations promote
# numerics back to strings. What we need is some way to make the string
# conversions happen on only the first and last iterations.
#
# The way we approach this is to do successive block splitting. Whenever
# we encounter values of incompatible types (defined as being a STRING
# and a non-STRING, or an IMPURE object and a pure one, or objects of
# two non-STRING types that will combine to STRING), we know that we likely
# can improve code by splitting the block. This process will unroll at least
# part of the loop, hoisting out the data conversions.
#
# Note that if we are splitting a multi-exit block, we must introduce
# additional dummy blocks because each exit will become a critical edge.
#
# Generally speaking, splitting blocks will give rise to many more
# opportunities to split. We continue until nothing can be split further.
# (There may be a need for a safety check to avoid infinite splits in
# certain pathological cases.)
#
# Let's see what happens as we do this. After one split, we see:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: b3:= phi(b1{2}, b4{8})    // STRING <- STRING x IMPURE NUMERIC
#    c3 := phi(c1{2}, c6{8})	// STRING <- STRING x IMPURE NUMERIC
#    i3 := phi(a1{2}, i6{8})    // STRING <- STRING x NUMERIC
#    x3 := phi(0{2}, x5a{8})	// NUMERIC <- ZERO x NUMERIC
#    i3a := impure numeric(i3)	// IMPURE NUMERIC <- STRING
#    if b3 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b3)	// IMPURE NUMERIC <- STRING
#    t4 := (i3a < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x3)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i3a		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c3 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c3)	// IMPURE NUMERIC <- STRING
#    i6 := i3a + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: return x3			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 3
#
#-----------------------------------------------------------------------------
#
# (There will also be phi's at the start of 'error0'. All error handling is
# omitted for clarity.)
#
# Note that the test in block 8 will always be false, and the existing
# optimizations in 'doTypeChecks' will eliminate it, and the block. But I
# defer reapplying the existing optimizations until all block splitting is
# complete.
#
# Block 3 now becomes a candidate for splitting. After the split, we have:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := phi(b1{3}, b4a{9})   // STRING <- STRING x IMPURE NUMERIC
#    c4 := phi(c1{3}, c7{9})	// STRING <- STRING x IMPURE NUMERIC
#    i4 := phi(i3{3}, i9{9})    // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
#    x4 := phi(0{3}, x6a{9})	// NUMERIC <- ZERO x NUMERIC
#    b4a := impure numeric(b4)	// IMPURE NUMERIC <- STRING
#    t4 := (i4 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x4)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i4		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c4 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c4)	// IMPURE NUMERIC <- STRING
#    i6 := i4 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: return x4			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 10
#
# 9: i9 = impure numeric(i7)         // NUMERIC <- NUMERIC
#    if b4a is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 4
#-----------------------------------------------------------------------------
#
# The whole of block 9 consists of redundant operations, which will be
# eliminated. More importantly, the test on a1 has been moved out of
# the loop and will be performed only once. We keep on splitting, hitting
# block 4 next. Phi operations are needed in blocks 5 and 7.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: b5 := phi(b4{4}, b10{10})  // STRING <- STRING x IMPURE NUMERIC
#    c5 := phi(c1{4}, c7{10})	// STRING <- STRING x IMPURE NUMERIC
#    i5 := phi(i3{4}, i9{10})   // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
#    x5 := phi(0{4}, x6a{10})	// NUMERIC <- ZERO x NUMERIC
#    x5a := x5 + i5		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c5 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c5)	// IMPURE NUMERIC <- STRING
#    i6 := i5 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5a{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 9
#
# 9: i9 = impure numeric(i6)         // NUMERIC <- NUMERIC
#    if b5 is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 10
#
# 10: b10 := impure numeric(b5)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     t10 := (i6a < b5)            // ZEROONE <- NUMERIC x IMPURE NUMERIC
#     if t10 is false goto 7
#     goto 5
#
#-----------------------------------------------------------------------------
#
# Another expensive type conversion moved out of the loop, and block 11
# begins with an instruction that will be eliminated in optimization.
# Onward to blocks 5 and 6!
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 := 0 + i3		// NUMERIC <- ZERO x IMPURE NUMERIC
#    if c1 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c1)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: b6 := phi(b4{6}, b10{12})
#                        // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    c8 := phi(c6{6}, c12{12})	// IMPURE NUMERIC x IMPURE NUMERIC
#                        // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    i8 := phi(i6{6}, i9{11})   //  NUMERIC <-  NUMERIC x NUMERIC
#    x8 := phi(x5{5}, x11{11})	// NUMERIC <- ZERO x NUMERIC
#    if i8 is not numeric goto error0 // <- NUMERIC
#    goto 9
#
# 9: i9 = impure numeric(i6a)   // NUMERIC <- NUMERIC
#    if b6 is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 10
#
# 10: b10 := impure numeric(b6)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     t10 := (i6a < b6)         // ZEROONE <- NUMERIC x IMPURE NUMERIC
#     if t10 is false goto 7
#     goto 11
#
# 11: x11 := x8 + i8		// NUMERIC <- NUMERIC x NUMERIC
#     if c6a is not numeric goto error3 // <- IMPURE NUMERIC
#     goto 12
#
# 12: c12 = impure numeric(c8)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     i12 := i8 + c8		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#     goto 8
#
#-----------------------------------------------------------------------------
#
# There's nothing left to split, because now all the data types at phi's
# are consistent, but there are a lot of tautologies to eliminate,
# useless type conversions to discard, and similar rubbish. After running
# the optimizations from other modules, the code will look something like:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: if c1 is not numeric goto error3 // <- STRING
#    x5 = 0 + i3		// NUMERIC <- ZERO + IMPURE NUMERIC
#    goto 6
#
# 6: c6 := impure numeric(c1)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: i8 := phi(i6{6}, i9{9})    // NUMERIC <- NUMERIC x NUMERIC
#    x8 := phi(a1{6}, x11{11})	// NUMERIC <- ZERO x NUMERIC
#    t8 := (i8 < b4)            // ZEROONE <- NUMERIC x IMPURE NUMERIC
#    if t8 is false goto 7
#    goto 9
#
# 9: x9 := x8 + i6a		// NUMERIC <- NUMERIC x NUMERIC
#    i9 := i8 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
#-----------------------------------------------------------------------------
#
# A tightly optimized inner loop has emerged, once constant branches and
# do-nothing type conversions have been eliminated. All the pesky string
# representations are gone from the intermediate results. A few odds and
# ends of the first trip through the loop have distributed themselves
# elsewhere in the code. 
#
# Now, how is this basic block splitting
#
# Essentially, it's a copying operation. We remove the 'jump'
# instruction at the end of each predecessor of the block being split,
# and replace it with the body of the split block. (All the predecessor blocks
# are single-exit, because we've done critical edge splitting.)
#
# We replace the phi instructions in the block with copies from the
# source operands. We track all the variables defined in the split
# code, since they now have multiple assignments to them, violating
# SSA. We add uses to the du-chains of all the variables used in the
# split code.
#
# Once the splitting is done, the first thing that we have to do is to
# make sure that there are no critical edges. Critical edges will have
# been introduced any time that we split a block that has multiple
# exits. We remove them by introducing a new, empty, basic block on each
# of the exits from the new block.
#
# The original block is now unreachable code. All variable uses
# in it are discarded, the block itself is emptied, and 'bbpred' is
# cleansed of references to it, both from it to its predecessors and from
# its successors to it.
#
# These manipulations have destroyed the dominance hierarchy, and this
# must be repaired. Currently, it is rebuilt wholesale. It would most
# likely be possible to constrain the rebuilding, using the ideas in
#
# Vugranam C. Sreedhar, Guang R. Gao and Yong-fong Lee. "Incremental
# Computation of Dominator Trees." ACM Trans. Progrm. Languages and
# Systems, 1995, pp. 1-12.
# http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.46.3846
#
# It might also be possible to avoid use of the dominator tree during
# this operation - see Section 5.4 of "SSA-based Compiler Design"
# (http://ssabook.gforge.inria.fr/latest/book.pdf) for how this might
# be carried out.
#
# We now have a program where the control flows are again correct, and
# the dominance relations are understood. The program is,
# nevertheless, not in SSA form. The violation of SSA is well
# characterized. We have available to us a set of variables that are
# assigned at multiple places, and the places where they are
# assigned. We can use a 'black box' SSA reconstruction algorithm on
# those variables to repair SSA.
#
# Finally, type analysis has to be repeated. Once again, this is
# currently done wholesale. It might be possible to constrain it to
# the newly-split variables and their phi-webs.
# There are algorithms known for repair of the dominance relations as
# edges are cut and spliced. This implementation more or less follows
# Vugranam C. Sreedhar, Guang R Gao, and Yong-Fong Lee; "Incremental
# Computation of Dominator Trees." Proc. ACM SIGPLAN Workshop on
# Intermediate Representations. San Francisco, Calif.:ACM, 1995,
# pp. 1-12.
#
# You have been warned.
#
#-----------------------------------------------------------------------------

# quadcode::transformer method insertSplitMarkers --
#
#	Inserts markers into the code indicating the original basic
#	blocks prior to any node splitting.
#
# Results:
#	None.
#
# Side effects:
#	Every basic block has a 'split' instruction added at its head.
#	The 'split' has no result, and its sole source operand is a literal
#	giving the original basic block number.

oo::define quadcode::transformer method insertSplitMarkers {} {

    my variable bbcontent
    set b -1
    foreach bb $bbcontent {
	incr b
	lset bbcontent $b {}
	set pc -1
	foreach q $bb {
	    incr pc
	    if {[lindex $q 0 0] ni {"entry" "phi" "param"}} {
		break
	    }
	}
	lset bbcontent $b [linsert $bb[set bb ""] $pc \
			       [list split {} [list literal $b]]]
    }

    return
}

# quadcode::transformer method countSplits --
#
#	Walks through the quadcode, counting the number of split markers
#	present for each original location in the code.
#
# Parameters:
#	None.
#
# Results:
#	Returns a list, indexed by the original basic block number,
#	where the values are the number of copies of the original block that
#	are present. Zero counts are possible and expected if the original
#	code has been optimized away entirely.

oo::define quadcode::transformer method countSplits {} {

    my variable bbcontent
    set r {}
    set b -1
    foreach bb $bbcontent {
	incr b
	foreach q $bb {
	    if {[lindex $q 0 0] eq "split"} {
		set origb [lindex $q 2 1]
		while {$origb >= [llength $r]} {
		    lappend r 0
		}
		set n [lindex $r $origb]
		incr n
		lset r $origb $n
	    }
	}
    }

    return $r
}

# quadcode::transformer method removeSplitMarkers --
#
#	Removes markers that indicate the source of code when code splitting
#
# Results:
#	None
#
# Side effects:
#	Split markers are removed.

oo::define quadcode::transformer method removeSplitMarkers {} {

    my variable bbcontent
    
    set b -1
    foreach bb $bbcontent {
	incr b
	set newbb {}
	foreach q $bb {
	    if {[lindex $q 0] ne "split"} {
		lappend newbb $q
	    }
	}
	lset bbcontent $b $newbb
    }

    return
}

# quadcode::transformer method nodesplit --
#
#	Attempts to factor type checking out of loops by splitting a
#	single node of the flow graph.
#
# Results:
#	Return 1 if a node was split, 0 if no splittable node was found.
#
# Side effects:
#	Major surgery on the program, that requires cleanup optimization
#	and type inference (including possible repairs to interprocedural
#	type inference).

oo::define quadcode::transformer method nodesplit {} {

    #    global splitcount
    #    if {[incr splitcount] > 10} {puts "$splitcount splits"; return 0}

    my variable ns_counters

    # Determine how many copies of each original basic block are present

    set ns_counters [my countSplits]

    my debug-nodesplit {
	puts "Before node splitting:"
	my dump-bb
    }

    # Walk the basic blocks in order, looking for things that could
    # be improved by splitting

    foreach b [my bborder] {
	if {[my ns_splittable $b]} {

	    # Split a single basic block

	    my ns_cloneBB $b
	    my debug-nodesplit {
		puts "After splitting:"
		my dump-bb
	    }
	    my audit-phis "one split"
	    return 1
	}
    }

    # Found nothing to split

    return 0
}

# quadcode::transformer method ns_splittable --
#
#	Determines whether a basic block is a candidate for splitting
#
# Parameters:
#	b - Basic block number of the block being examined
#
# Results:
#	Returns 1 if the block should be split, 0 otherwise.
#
# A basic block may be split if it contains a phi where 'substantially
# different' types arrive, and it does not begin with an indication that
# it has been split already.

oo::define quadcode::transformer method ns_splittable {b} {

    my variable bbcontent
    my variable ns_counters
    my variable types
    
    set bbSplitLimit 8

    namespace upvar ::quadcode::dataType \
	EMPTY EMPTY IMPURE IMPURE NUMERIC NUMERIC OTHERSTRING OTHERSTRING \
							       BOOLEAN BOOLEAN

    set bb [lindex $bbcontent $b]
    set pc -1
    foreach q $bb {
	incr pc
	if {[lindex $q 0] ne "phi"} {

	    # We've come to the end of the phi operations, without finding
	    # a phi that would make the block splittable.
	    return 0
	}
		
	# $q is a phi operation in the block. Test whether the types of its
	# args are substantially different.

	set v1 [lindex $q 3]
	set t1 [typeOfOperand $types $v1]

	if {$t1 == 0} {
	    # Sanity check - die on mislinked variables
	    error "NOTHING ends up in $v1 at $b:$pc: $q"
	}

	set splitThisPhi 0
	foreach {origin v2} [lrange $q 4 end] {
	    set splitThisPhi 0
	    set t2 [typeOfOperand $types $v2]
	    
	    if {$t2 == 0} {
		# Sanity check - die on mislinked variables
		error "NOTHING ends up in $v2 at $b:$pc: $q"
	    }
	    
	    # Split if one value is IMPURE, EMPTY or OTHERSTRING,
	    # while the other value is none of the above.
	    set expensive1 \
		[expr {($t1 & ($IMPURE | $EMPTY | $OTHERSTRING)) != 0}]
	    set expensive2 \
		[expr {($t2 & ($IMPURE | $EMPTY | $OTHERSTRING)) != 0}]
	    if {$expensive1 ^ $expensive2} {
		set splitThisPhi 1
		break
	    }

	    # Split if one value is OTHERSTRING and the other is
	    # some sort of IMPURE NUMERIC or IMPURE BOOLEAN
	    if {($t1 & $OTHERSTRING)
		&& [::quadcode::dataType::isa $t2 \
			[expr {$IMPURE | $NUMERIC | $BOOLEAN}]]} {
		set splitThisPhi 1
		break
	    }
	    
	    if {($t2 & $OTHERSTRING)
		&& [::quadcode::dataType::isa $t1 \
			[expr {$IMPURE | $NUMERIC | $BOOLEAN}]]} {
		set splitThisPhi 1
		break
	    }
	}

	if {$splitThisPhi} {

	    set phi $q
	    my debug-nodesplit {
		puts "$b:$pc: $q"
		puts [format "\tmight be a candidate for node splitting\
	                      %s = %#llx %s" \
			  $v1 $t1 [nameOfType $t1]]
		if {$t1 < 0} {
		    puts "       ~[nameOfType [expr {~$t1}]]"
		}
		puts [format "%s = %#llx %s" \
			  $v2 $t2 [nameOfType $t2]]
		if {$t2 < 0} {
		    puts "       ~[nameOfType [expr {~$t2}]]"
		}
	    }
	    # We will add one more instance of the block for every
	    # predecessor, but delete one for the current instance
	    set delta [expr {[llength $phi]/2 - 2}]
	    set PC -1

	    # Make sure that the block hasn't been split too often already
	    while {[incr pc] < [llength $bb]} {
		set q [lindex $bb $pc]
		if {[lindex $q 1 0] eq "bb"} {
		    break
		}
		if {[lindex $q 0] eq "split"} {
		    set splitFrom [lindex $q 2 1]
		    set count [lindex $ns_counters $splitFrom]
		    if {$count + $delta > $bbSplitLimit} {
			my debug-nodesplit {
			    puts "$b:$pc: $splitFrom has been split too many\
                                  times already."
			}
			return 0
		    }
		}
	    }

	    my debug-nodesplit {
		puts "split block $b: [llength $bb] quads and\
                              [expr {([llength $phi] - 1)/2}] predecessors"
	    }
	    return 1
	    
	} else {
	    my debug-nodesplit {
		puts "$b:$pc: $q"
		puts "\tis not a candidate for node splitting"
		puts "   $v1 = $t1 [nameOfType $t1]"
		if {$t1 < 0} {
		    puts "       ~[nameOfType [expr {~$t1}]]"
		}
		puts "   $v2 = $t2 [nameOfType $t2]"
		if {$t2 < 0} {
		    puts "       ~[nameOfType [expr {~$t2}]]"
		}
	    }
	}
    }

    error "fell off the end of a basic block"

}
   
# quadcode::transformer method ns_cloneBB --
#
#	Clones a basic block that has been identified as a candidate
#	for splitting.
#
# Parameters:
#	b - Basic block number of the block being cloned
#
# Results:
#	None.
#
# Side effects:
#	Copies the block onto the end of its successors, and adds
#	edges, blocks for critical edge splitting, and phi nodes as
#	necessary to have the code in two places instead of one.
#
# The block must be multiple entry, therefore all its predecessors
# are single exit. That's the key concept that makes this splitting
# possible - that we can hoist the code up into the predecessors.

oo::define quadcode::transformer method ns_cloneBB {b} {

    my variable bbcontent
    my variable bbpred

    my debug-nodesplit {
	puts "Cloning and eliminating basic block $b:"
	set bb [lindex $bbcontent $b]
	set pc -1
	foreach q $bb {
	    puts "  $b:[incr pc]: $q"
	}
	puts "------------------------------"
    }

    # dups is a three-level dictionary. The first key is the name of
    # a variable whose assignment has been duplicated. The second-level
    # key is the number of the basic block where it now appears, and
    # the content is the number of assignments to the variable that appear
    # in the block.

    set dups {}

    # Copy the block's instructions into its predecessors. Split critical
    # edges if the block has more than one exit.

    set ps [lindex $bbpred $b]
    dict for {pred -} $ps {
	my ns_rewriteBlockOntoPred dups $b $pred
    }

    # If the block had multiple successors, its successors are all
    # single entry and free of phi nodes. If it had a single successor,
    # then any phi's in the successor need to have it removed and the 
    # clones inserted.

    set ss [my bbsucc $b]
    if {[llength $ss] eq 1} {
	my ns_fixupPhis [lindex $ss 0] $b $ps
    }

    # Unlink du-chains from the split block, and remove its
    # predecessor and successor links. Fixup bbidom, bbkids, and bblevel
    # to reflect the new state of the DJ graph. 

    my ns_destroyBB $b
    my bbidom
    my bblevel

    # Repair SSA

    my debug-nodesplit {
	my variable duchain
	puts "Repair SSA after node splitting"
	dict for {v defs} $dups {
	    puts "   $v: [dict keys $defs] -> [dict keys [dict get $duchain $v]]"
	}
	my dump-bb
    }

    dict for {v defs} $dups {
	my repairSSAVariable $v $defs
    }

}

# quadcode::transformer method ns_rewriteBlockOnPred --
#
#	Copies the instructions of the body of a basic block
#	(which must be two-entry) onto the end of a predecessor,
#	renaming all assigned variables.
#
# Parameters:
#	dupsv - Name of a variable in the callers scope that holds a two-level
#               dictionary tracking the duplicated assignment statements. The
#	        first-level keys are basic block numbers; the second-level
#	        keys are basic blocks where the variables are assigned, and
#	        the values are counts of assignments within the blocks.
#	b - Number of the basic block to copy
#	pred - Number of the predecessor block
#
# Results:
#	None.
#
# Side effects:
#	Copies the code, and updates du-chains of variables appearing in
#	instructions being copied. Updates 'dupsv' with the locations of
#	variable definitions in the copies. Splits critical edges if
#	they are being introduced.

oo::define quadcode::transformer method ns_rewriteBlockOntoPred {dupsv b pred} {

    upvar 1 $dupsv dups

    my variable bbcontent
    my variable bbpred

    # Look up the code we're copying and the code that we're appending to.

    set bb [lindex $bbcontent $b]
    set pb [lindex $bbcontent $pred]
    lset bbcontent $pred {}
    set pb [lreplace $pb[set pb {}] end end]

    my debug-nodesplit {
    	puts "Want to copy basic block $b into predecessor $pred"
    	set pc -1
    	foreach q $bb {
    	    puts "    $b:[incr pc]: $q"
    	}
    	puts " ------- "
    	set pc -1
    	foreach q $pb {
    	    puts "    $pred:[incr pc]: $q"
    	}
    	puts " +++++++ "
    }


    # Iterate through the instructions being copied
    set key [list bb $pred]
    set pc -1
    set critical 0
    foreach q $bb {
	incr pc

	set res [lindex $q 1]

	# If we're copying a phi, replace it with a copy from the appropriate
	# source variable.
	set op [lindex $q 0 0]
	switch -exact -- $op {
	    phi {
		set repvar [dict get $q $key]
		if {$repvar eq "Nothing"} {
		    set q [list unset $res]
		} else {
		    set q [list copy $res $repvar]
		}
	    }
	}

	# Examine the result of the operation. If the result is a value,
	# add the copy to the dictionary of multiple assignments that will
	# need SSA repair. If the operation is a conditional jump, peform
	# critical edge splitting. On any jump, update the 'bbpred' link.
	switch -exact -- [lindex $res 0] {
	    "temp" - "var" {
		if {[dict exists $dups $res]} {
		    set d [dict get $dups $res]
		    dict set dups $res {}
		} else {
		    set d {}
		}
		dict incr d $pred
		dict set dups $res $d
	    }
	    "bb" {
		if {[lindex $q 0] ne "jump"} {
		    set critical 1
		}
		if {$critical} {
		    set tgt [my makeEmptyBB [lindex $res 1]]
		    my debug-nodesplit {
			puts "       (* $tgt:0: [list jump $res] *)"
		    }
		    set res [list bb $tgt]
		    lset q 1 $res
		}
		my bblink $pred [lindex $res 1]
	    }
	}
	
	# Add du-chaining for the variable references in the copied code
	foreach opd [lrange $q 2 end] {
	    if {[lindex $opd 0] in {"temp" "var"}} {
		my addUse $opd $pred
	    }
	}

	# Copy the quad to its new home
	my debug-nodesplit {
	    puts "    $pred:[llength $pb]: $q"
	}
	lappend pb $q
    }

    lset bbcontent $pred $pb
    return
}

# ns_fixupPhis --
#
#	Repairs phi operations in a successor block after splitting a
#	basic block.
#
# Parameters:
#	b - Successor block
#	orig - Original block that was split
#	clones - Blocks into which the code was cloned.
#
# Results:
#	None.
#
# Side effects:
#	Phi operations are rewritten

oo::define quadcode::transformer method ns_fixupPhis {b orig clones} {

    my variable bbcontent

    my debug-nodesplit {
	puts "  redirect phi operations in $b that refer to $orig to use\
	        [dict keys $clones] instead"
    }
    
    set bb [lindex $bbcontent $b]
    lset bbcontent $b {}
    set key [list bb $orig]
    set repls [lmap {c -} $clones {list bb $c}]
    set pc -1
    foreach q $bb {
	if {[lindex $q 0] ne "phi"} {
	    break
	}
	incr pc
	set newq {}
	dict for {from fromvar} $q {
	    if {$from ne $key} {
		lappend newq $from $fromvar
	    } else {
		foreach newfrom $repls {
		    lappend newq $newfrom $fromvar
		}
	    }
	}
	my debug-nodesplit {
	    puts "    $b:$pc: redirected: $newq"
	}
	lset bb $pc $newq
    }
    lset bbcontent $b $bb
	    
    return
}

# ns_destroyBB --
#
#	Destroys a basic block after node splitting has replaced it with
#	one or more new instances.
#
# Parameters:
#	b - Basic block number to be destroyed
#
# Results:
#	None.
#
# Side effects:
#	All values defined in the block have their du-chains unlinked.
#	The block is erased from 'bbcontent' and 'bbpred'.
#
# The next time that 'deadcode' runs, the last vestiges of the block,
# which is now unreachable, will be eliminated entirely.

oo::define quadcode::transformer method ns_destroyBB {b} {

    my variable bbcontent
    my variable bbpred

    # Unlink du-chains
    set bb [lindex $bbcontent $b]
    set key [list bb $b]
    foreach q $bb {
	foreach opd [lrange $q 2 end] {
	    if {[lindex $opd 0] in {"var" "temp"}} {
		my removeUse $opd $b
	    }
	}
    }

    # Unlink predecessor relationships
    foreach s [my bbsucc $b] {
	set ps [lindex $bbpred $s]
	lset bbpred $s {}
	dict unset ps $b
	lset bbpred $s $ps
    }

    # Remove the block from 'bbpred' and 'bbcontent'
    set preds [lindex $bbpred $b]
    lset bbpred $b {}
    lset bbcontent $b {}

}