M4BASIC - utf8.m4b
Not logged in
REM UTF encoder - encode characters like ë, ô, í to UTF-8.
REM http://en.wikipedia.org/wiki/UTF-8
REM
REM Taken from the utf8 example of BaCon.
REM PvE, August 2014 - GPL.

STRING arg, new
INTEGER c, t, x, b1, b2

REM Get the separate arguments
IF argc != 2 THEN
    PRINT "Usage: utf8 <string>" NL
    END 1
ENDIF

SET arg = argv[1]

REM Print info about inputstring
NL PRINT "ASCII values decimal: "
FOR x = 1 TO LEN(arg)
    PRINT STR(ASC(MID(arg, x, 1))) PRINT " "
NEXT

NL PRINT "ASCII values hex: "
FOR x = 1 TO LEN(arg)
    PRINT HEX(ASC(MID(arg, x, 1))) PRINT " "
NEXT

NL PRINT "ASCII string: " PRINT arg

LET t = 1

REM Calculate to UTF8
WHILE t <= LEN(arg) DO

    LET c = ASC(MID(arg, t, 1))

    IF c > 127 THEN
        REM Binary AND with 11000000, shift 6 positions to the right, add 11000000 to identify 2nd byte
        LET b1 = ((c & 192) >> 6) + 192

        REM Binary AND with 00111111, add 10000000 to identify first byte
        LET b2 = (c & 63) + 128

        REM Add UTF char with ASCII of byte1 + ASCII of byte 2
        SET new = CONCAT(new, CHR(b1))
        SET new = CONCAT(new, CHR(b2))
    ELSE
        SET new = CONCAT(new, MID(arg, t, 1))
    ENDIF

    INCR t
WEND

REM Print info about outputstring
NL PRINT "UTF-8 values decimal: "
FOR x = 1 TO LEN(new)
    PRINT STR(ASC(MID(new, x, 1))) PRINT " "
NEXT

NL PRINT "UTF-8 values hex: "
FOR x = 1 TO LEN(new)
    PRINT HEX(ASC(MID(new, x, 1))) PRINT " "
NEXT

NL PRINT "UTF-8 string: " PRINT new NL

Return to M4BASIC