74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
write_file utf-bad-fe-80-80-80-80-80-80 "36 bit NUL:\xFC\x80\x80\x80\x80\x80 is bad\n"
write_file utf-bad-c0-81 "overlong SOH:\xC0\x81 is bad\n"
write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n"
write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n"
write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n"
write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n"
write_file utf-bad-ff "no byte FF:\xFF\n"
write_file utf-ill16-lead "lead surrogate:\xED\xA0\x80 is ill formed\n"
write_file utf-ill16-trail "trail surrogate:\xED\xB0\x80 is ill formed\n"
write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n"
set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n"
protOut $emoji
write_file utf-8-emoji $emoji
# make all the test files known to fossil, then test
fossil addremove
fossil test-commit-warning --no-settings -v
test pre-commit-warnings-1 {[normalize_result] eq \
[subst -nocommands -novariables [string trim {
1\tbinary\tbinary data
1\tcr-lf-crlf.txt\tmixed line endings
1\tcr-only.txt\tCR line endings
1\tcrlf.txt\tCR/LF line endings
0\tempty\t
0\tline-0064\t
0\tline-1024\t
0\tline-4096\t
1\tline-64K\tlong lines
1\tline-8192\tlong lines
0\tplain.txt\t
0\tutf-8-emoji\t
1\tutf-bad-c0-81\tinvalid UTF-8
1\tutf-bad-c0-bf\tinvalid UTF-8
1\tutf-bad-c1-bf\tinvalid UTF-8
1\tutf-bad-e0-80-80\tinvalid UTF-8
1\tutf-bad-f0-80-80-80\tinvalid UTF-8
1\tutf-bad-f4-90-80-80\tinvalid UTF-8
1\tutf-bad-f8-80-80-80-80\tinvalid UTF-8
1\tutf-bad-f9-80-80-80-80\tinvalid UTF-8
1\tutf-bad-fc-80-80-80-80-80\tinvalid UTF-8
1\tutf-bad-fe-80-80-80-80-80-80\tinvalid UTF-8
1\tutf-bad-ff\tinvalid UTF-8
0\tutf-ill16-lead\t
0\tutf-ill16-pair\t
0\tutf-ill16-trail\t
0\tutf-mod-c0-80\t
1}]]}
###############################################################################
# TODO: Change to a collection of test-case crafted files
# rather than depend on this list of files that will
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
write_file utf-bad-fe-80-80-80-80-80-80 "36 bit NUL:\xFC\x80\x80\x80\x80\x80 is bad\n"
write_file utf-bad-c0-81 "overlong SOH:\xC0\x81 is bad\n"
write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n"
write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n"
write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n"
write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n"
write_file utf-bad-ff "no byte FF:\xFF\n"
write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n"
write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n"
write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n"
set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n"
protOut $emoji
write_file utf-8-emoji $emoji
write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji"
# UTF-16 uses 16-bit values to cover all valid unicode code points
# from U+0 to U+10FFFF, using surrogate pairs to escape the BMP.
# Interchange require knowing (and preserving) byte order.
write_file utf-16le-hello "h\x00e\x00l\x00l\x00o\x00\n\x00"
write_file utf-16be-hello "\x00h\x00e\x00l\x00l\x00o\x00\n"
set bomLE "\xff\xfeh"
set bomBE "\xfe\xffh"
write_file utf-16le-bomle-hello "$bomLE\x00e\x00l\x00l\x00o\x00\n\x00"
write_file utf-16be-bombe-hello "$bomBE\x00h\x00e\x00l\x00l\x00o\x00\n"
write_file utf-16le-bombe-hello "$bomBE\x00e\x00l\x00l\x00o\x00\n\x00"
write_file utf-16be-bomle-hello "$bomLE\x00h\x00e\x00l\x00l\x00o\x00\n"
set le16 [read_file [file join $testdir utf16le.txt]]
set be16 [read_file [file join $testdir utf16be.txt]]
write_file utf-16le.txt $le16
write_file utf-16be.txt $be16
write_file utf-nobom-16le.txt [string range $le16 2 end]
write_file utf-nobom-16be.txt [string range $be16 2 end]
#write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end]
#write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end]
# make all the test files known to fossil, then test
fossil addremove
fossil test-commit-warning --no-settings -v
test pre-commit-warnings-1 {[normalize_result] eq \
[subst -nocommands -novariables [string trim {
1\tbinary\tbinary data
1\tcr-lf-crlf.txt\tmixed line endings
1\tcr-only.txt\tCR line endings
1\tcrlf.txt\tCR/LF line endings
0\tempty\t
0\tline-0064\t
0\tline-1024\t
0\tline-4096\t
1\tline-64K\tlong lines
1\tline-8192\tlong lines
0\tplain.txt\t
1\tutf-16be-bombe-hello\tbinary data
1\tutf-16be-bomle-hello\tbinary data
1\tutf-16be-hello\tbinary data
1\tutf-16be.txt\tUnicode
1\tutf-16le-bombe-hello\tUnicode
1\tutf-16le-bomle-hello\tUnicode
1\tutf-16le-hello\tbinary data
1\tutf-16le.txt\tUnicode
0\tutf-8-bom-emoji\t
0\tutf-8-emoji\t
1\tutf-bad-c0-81\tinvalid UTF-8
1\tutf-bad-c0-bf\tinvalid UTF-8
1\tutf-bad-c1-bf\tinvalid UTF-8
1\tutf-bad-e0-80-80\tinvalid UTF-8
1\tutf-bad-f0-80-80-80\tinvalid UTF-8
1\tutf-bad-f4-90-80-80\tinvalid UTF-8
1\tutf-bad-f8-80-80-80-80\tinvalid UTF-8
1\tutf-bad-f9-80-80-80-80\tinvalid UTF-8
1\tutf-bad-fc-80-80-80-80-80\tinvalid UTF-8
1\tutf-bad-fe-80-80-80-80-80-80\tinvalid UTF-8
1\tutf-bad-ff\tinvalid UTF-8
0\tutf-ill16-lead\t
0\tutf-ill16-pair\t
0\tutf-ill16-trail\t
0\tutf-mod-c0-80\t
1\tutf-nobom-16be.txt\tbinary data
1\tutf-nobom-16le.txt\tbinary data
1}]]}
###############################################################################
# TODO: Change to a collection of test-case crafted files
# rather than depend on this list of files that will
|