REM UTF encoder - encode characters like ë, ô, í to UTF-8. REM http://en.wikipedia.org/wiki/UTF-8 REM REM Taken from the utf8 example of BaCon. REM PvE, August 2014 - GPL. STRING arg, new INTEGER c, t, x, b1, b2 REM Get the separate arguments IF argc != 2 THEN PRINT "Usage: utf8 <string>" NL END 1 ENDIF SET arg = argv[1] REM Print info about inputstring NL PRINT "ASCII values decimal: " FOR x = 1 TO LEN(arg) PRINT STR(ASC(MID(arg, x, 1))) PRINT " " NEXT NL PRINT "ASCII values hex: " FOR x = 1 TO LEN(arg) PRINT HEX(ASC(MID(arg, x, 1))) PRINT " " NEXT NL PRINT "ASCII string: " PRINT arg LET t = 1 REM Calculate to UTF8 WHILE t <= LEN(arg) DO LET c = ASC(MID(arg, t, 1)) IF c > 127 THEN REM Binary AND with 11000000, shift 6 positions to the right, add 11000000 to identify 2nd byte LET b1 = ((c & 192) >> 6) + 192 REM Binary AND with 00111111, add 10000000 to identify first byte LET b2 = (c & 63) + 128 REM Add UTF char with ASCII of byte1 + ASCII of byte 2 SET new = CONCAT(new, CHR(b1)) SET new = CONCAT(new, CHR(b2)) ELSE SET new = CONCAT(new, MID(arg, t, 1)) ENDIF INCR t WEND REM Print info about outputstring NL PRINT "UTF-8 values decimal: " FOR x = 1 TO LEN(new) PRINT STR(ASC(MID(new, x, 1))) PRINT " " NEXT NL PRINT "UTF-8 values hex: " FOR x = 1 TO LEN(new) PRINT HEX(ASC(MID(new, x, 1))) PRINT " " NEXT NL PRINT "UTF-8 string: " PRINT new NL
Return to M4BASIC