#include "codegen.h"
using namespace goose;
using namespace goose::eir;
// Mangling scheme:
//
// Functions, types and everything are uniquely identified by IR expressions
// (built out of eir::Term objects) called Identities.
// The mangling scheme is therefore a lossless text serialization of such an identity,
// with a simple compression scheme to make it shorter.
//
// All integers serialized as part of this scheme are in hex.
//
// Each element is introduced by one character. Hex digits (0-9, a-f) can't be used as
// intro characters.
//
// Literal elements:
// i: integer, followed by its value
// S: string, followed by its length, followed by ':', followed by the string itself.
// n: anonymous StringId, followed by its unique id
// s: StringId, followed by its length, followed by ':', followed by the string itself.
// _ followed by an hex number: vector, whose length is that number,
// followed by its contained elements, serialized recursively.
// @ same as above, but the vector is of variable length and is followed by the repetition element.
// p, P: a void* or ptr< void > term (unfortunately they have legitimate uses in some type identitiers
// that we do need to be able to mangle)
// x: an eir term placeholder.
//
// $: compressed string id: it is followed by the 0 based index of a previously encountered
// StringId literal, in the order they were encountered.
//
// The letters V, o, v, t, T, q, u, r, R, n, l:
// respectively the StringIDs: Value, Constant, void, type, rt_type,
// quote, func, param, predicates, anykind, bool.
// (those are super recurrent StringIDs in every function identity, so they have dedicated
// symbols to make things shorter. Note that none of those can be the letter a to f)
//
// Some types of Terms (wildcards such as AnyTerm, opaque pointer to c++ objects) are not meant
// to appear in an identity expression and will cause mangling to fail.
// TODO: this mangling scheme is ok at this stage of development as it does perform the only thing
// we really need at the moment, which is to make sure that function with distinct identities have a
// distinct mangling.
// However too much unrelated stuff ends up in the mangling, which is bad. It tends to expose
// the internals of type implementation a bit too much - changes that should be irrelevant to binary
// compatibility might break it.
// This should be replaced with a pattern based rule system, much like the type checking rules,
// so that we can still have this as default but a specific handling of higher level constructs
// such as values (to remove the location id), function types (to get rid of the param pattern),
// etc.
// Alternatively maybe we should be more clever when building identities and avoid including those
// unecessary parts. This would require more intelligence to build an identity, although it might
// not necessarily be more complicated than the rule system idea above.
namespace
{
class Mangler
{
public:
Mangler()
{
m_mangled << hex;
}
bool mangle( const Term& t )
{
return visit( [&]( auto&& t )
{
return mangle( t );
}, t );
}
bool mangle( uint32_t integer )
{
m_mangled << 'i' << integer;
return true;
}
// Locations shouldn't be included in the mangling, but
// should not cause it to fail either.
bool mangle( LocationId loc )
{
return true;
}
bool mangle( const string& str )
{
m_mangled << 'S' << str.size() << ':' << str;
return true;
}
bool mangle( const ptr< void >& pv )
{
m_mangled << 'P';
return true;
}
bool mangle( void* pv )
{
m_mangled << 'p';
return true;
}
bool mangle( StringId strId )
{
// Some hard coded abbreviations for super recurrent stuff that ends up being present every time.
// Not strictly necessary, but makes for shorter symbols.
if( strId == "value"_sid )
{
m_mangled << 'V';
return true;
}
if( strId == "constant"_sid )
{
// Use o because we can't use a letter that also counts as an hex digit
// if we want to be able to unmangle it
m_mangled << 'o';
return true;
}
if( strId == "void"_sid )
{
m_mangled << 'v';
return true;
}
if( strId == "type"_sid )
{
m_mangled << 't';
return true;
}
if( strId == "rt_type"_sid )
{
m_mangled << 'T';
return true;
}
if( strId == "quote"_sid )
{
m_mangled << 'q';
return true;
}
if( strId == "func"_sid )
{
m_mangled << 'u';
return true;
}
if( strId == "param"_sid )
{
m_mangled << 'r';
return true;
}
if( strId == "predicates"_sid )
{
m_mangled << 'R';
return true;
}
if( strId == "anykind"_sid )
{
m_mangled << 'n';
return true;
}
if( strId == "bool"_sid )
{
m_mangled << 'l';
return true;
}
// Generic compression scheme to avoid storing the same stringId
// literals multiple times.
if( auto it = m_dictionary.find( strId ); it != m_dictionary.end() )
{
m_mangled << '$' << it->second;
return true;
}
m_dictionary.emplace( strId, m_dictionary.size() );
m_mangled << 's' << strId.str().size();
if( strId.isNumerical() )
m_mangled << '#' << strId.id() << '#';
else
m_mangled << ':' << strId.str();
return true;
}
bool mangle( const pvec& vec )
{
if( vec->repetitionTerm() )
m_mangled << '@';
else
m_mangled << '_';
m_mangled << vec->terms().size();
for( auto&& t : vec->terms() )
{
if( !mangle( t ) )
return false;
}
if( vec->repetitionTerm() && !mangle( *vec->repetitionTerm() ) )
return false;
return true;
}
bool mangle( const AnyTerm& a )
{
m_mangled << 'x';
return true;
}
bool mangle( const Hole& h )
{
m_mangled << 'h';
mangle( h.name() );
mangle( h.kind() );
return true;
}
template< typename T >
bool mangle( const T& )
{
return false;
}
auto mangled() const
{
return m_mangled.str();
}
private:
stringstream m_mangled;
unordered_map< StringId, size_t > m_dictionary;
};
}
namespace goose::codegen
{
optional< string > Mangle( const Term& identity )
{
Mangler m;
if( !m.mangle( identity ) )
return nullopt;
return m.mangled();
}
}