Goose  Artifact [0accef60fd]

Artifact 0accef60fd383a01620b00aaafd2d8275a43307f15f35f5703d08fd4273baed3:

  • File bs/builtins/types/runtime/unify.cpp — part of check-in [18e3bfa195] at 2019-08-03 23:38:56 on branch trunk — Fixed the ct_int unification rule, which didn't work correctly against generic function parameters. (user: achavasse size: 7838)

#include "builtins/builtins.h"

using namespace empathy;
using namespace empathy::ir;

namespace empathy::builtins
{
    void SetupRuntimeTypesUnification( Env& e )
    {
        auto rtIntTypePattern = Value( TypeType(), TVEC( TSID( rt_type ),
            ANYTERM( _ ),
            TSID( rt_integer ), ANYTERM( _ ), ANYTERM( _ ) ) );

        // ct_int type against a RTInteger type:
        // return the RTInteger type. We don't care if the
        // ct_int fits at this point, this will be dealt with by
        // the ct_int value unification rule below.
        e.unificationRuleSet()->addSymRule(
            GetValueType< APSInt >(),
            ValueToIRExpr( rtIntTypePattern ),
        []( const Term& lhs, const Term& rhs, UnificationContext& c ) -> UniGen
        {
            co_yield { rhs, c };
        } );

        // ct_int constant unification against any other value:
        // Unify the value's type with ct_int, and if successful
        // (as per the rule of ct_int versus rtint above), wrap
        // the result in a callback.
        // That callback will check  that the final unified value is
        // of an RTInt type of suitable
        // size. If it is, it wil emit a load const int instruction.
        // Otherwise, it will fail the unification.
        //
        // The reason we do all this is because we have to resolve the
        // type unification first. It is only after this is done (and any
        // hole is replaced with its infered value) that we know the actual
        // wanted type for the value. We need to know that type to know if
        // our constant value can be converted, and to emit the conversion.
        //
        // So we do this in a callback, which lets us perform a check and
        // substitution of the final value after any contained hole has been
        // replaced, and unification callback has been applied.
        //
        // An example of situation where this is needed is as follow:
        //
        // void lomarf( $T a, $T b ) {}
        // RTUint( 8 ) someInt;
        // lomarf( someInt, 123 );
        //
        // $T will resolve to RTUInt( 8 ), but if we had a simple ct_int against
        // RTUInt param unification rule, the rule wouldn't work, because before we
        // apply substitutions, the second parameter is still expressed as $T b,
        // which would match the rule.
        // However, the ct_int against "value with placeholder type and content" will
        // match against $T b, and wrap it with a callback. Substitute will then
        // turn $T b into RTUInt( 8 ) b and then invoke the callback on that, which
        // can do its final checks and emit the load const int instruction wrapped as
        // a value, or return nullopt to indicate that the unification failed (in case
        // the constant is too big to fit in the RTUInt( 8 )).
        e.unificationRuleSet()->addAsymRule(

            ValueToIRExpr( ValuePattern(
                ANYTERM( _ ), ANYTERM( _ ), ANYTERM( _ ) ) ),

            ValueToIRExpr( Value(
                GetValueType< APSInt >(),
                ANYTERM( _ ) ) ),

        []( const Term& lhs, const Term& rhs, UnificationContext& c ) -> UniGen
        {
            // Unify the type
            auto valPat = ValuePatternFromIRExpr( lhs );

            for( auto&& [s, uc] : Unify( valPat->type(), GetValueType< APSInt >(), c ) )
            {
                // When we match through this rule rather than through the default rule,
                // we don't count the vector that contains the value itself, so add its complexity manually
                // here.
                // Otherwise it can yield incorrect results and incorrect overloads ordering.
                //
                // For instance, in this case, given
                //   bool equals( $a, $a ) { return true }
                //   bool equals( $a, $b ) { return false }
                // equals( 219, 219 ) would return false, because the unification of $b against $b
                // invokes this rule, whereas the two separate unifications of 219 against a different
                // hole would each yield a higher result. The complexity score being higher, the
                // additional unique hole for the second overload would be ignored in the ranking.
                uc.incComplexity( 2 );

                // Yield a value of the unified type, wrapped into a callback
                // that will perform the size check and emit the load const once
                // we know the final unified solution for the value's type.
                // Also, the callback should just return the final valus as is if
                // is happens tobe a ct_int.
                valPat->type() = s;

                auto wrapped = WrapWithCallback( ValueToIRExpr( *valPat ),
                    [rhs]( auto&& t ) -> optional< Term >
                    {
                        auto lhsVal = *ValuePatternFromIRExpr( t );

                        if( lhsVal.type() == GetValueType< APSInt >() )
                            return rhs;

                        auto ct = *FromValue< APSInt >( *ValueFromIRExpr( rhs ) );
                        auto rttypeVal = *ValueFromIRExpr( lhsVal.type() );
                        auto rttype = FromValue< RTInteger >( rttypeVal );
                        if( !rttype )
                            return nullopt;

                        APSInt valToLoad;

                        if( rttype->m_signed )
                        {
                            if( ct.getMinSignedBits() > rttype->m_numBits )
                                return nullopt;

                            if( ct.isNegative() )
                                valToLoad = ct.sext( rttype->m_numBits );
                            else
                                valToLoad = ct.zext( rttype->m_numBits );
                        }
                        else
                        {
                            if( ct.isNegative() )
                                return nullopt;

                            if( ct.getActiveBits() > rttype->m_numBits )
                                return nullopt;

                            valToLoad = ct.zext( rttype->m_numBits );
                        }

                        auto* llvmType = static_cast< llvm::IntegerType* >( GetLLVMType( rttypeVal ) );
                        return ValueToIRExpr(
                            BuildComputedValue( lhsVal.type(), llr::LoadConstInt( llvmType, move( valToLoad ) ) ) );
                    }
                );

                co_yield { move( wrapped ), uc };
            }
        } );

        auto rtInt8TypePattern = Value( TypeType(), TVEC( TSID( rt_type ),
            ANYTERM( _ ),
            TSID( rt_integer ), TERM( 8ULL ), ANYTERM( _ ) ) );

        auto rtInt8PtrTypePattern = Value( TypeType(), TVEC( TSID( rt_type ),
            ANYTERM( _ ),
            TSID( rt_pointer ), ValueToIRExpr( rtInt8TypePattern ) ) );

        // ct_string constant unification against a rt_pointer to a rt_integer( 8 ):
        // Emit a LoadConstantStr llr instruction.
        e.unificationRuleSet()->addSymRule(

            ValueToIRExpr( Value(
                GetValueType< string >(),
                ANYTERM( _ ) ) ),

            ValueToIRExpr( ValuePattern(
                ANYTERM( _ ),
                ValueToIRExpr( rtInt8PtrTypePattern ),
                ANYTERM( _ ) ) ),

        []( const Term& lhs, const Term& rhs, UnificationContext& c ) -> UniGen
        {
            auto str = *FromValue< string >( *ValueFromIRExpr( lhs ) );
            auto rhsVal = *ValuePatternFromIRExpr( rhs );

            co_yield { ValueToIRExpr(
                BuildComputedValue( rhsVal.type(), llr::LoadConstStr( str ) ) ), c };
        } );
    }
}