Changes On Branch 6e28faf65d6d02f1
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch unchained Through [6e28faf65d] Excluding Merge-Ins

This is equivalent to a diff from a52ac1d5fa to 6e28faf65d

2023-03-22
16:58
Merge trunk 64c58db9fc: Adapt 2 testcases (io-39.16/io-39.16a), showing that "-encoding" can be shor... check-in: 8043924d78 user: pooryorick tags: unchained
16:57
Merge trunk a52ac1d5fa: winFCmd-1.24 differs in error code on Win 11. check-in: 6e28faf65d user: pooryorick tags: unchained
16:56
Merge trunk 13b04d4d60: Add "notWsl" test constraints. Clean up many testcases. check-in: 7d960693cd user: pooryorick tags: unchained
2023-03-09
10:22
Merge 8.7 check-in: 64c58db9fc user: jan.nijtmans tags: trunk, main
02:52
Merge 8.7 - winFCmd-1.24 differs in error code on Win 11 check-in: a52ac1d5fa user: apnadkarni tags: trunk, main
02:50
Merge 8.6 - winFCmd-1.24 differs in error code on Win 11 check-in: 6e90502faf user: apnadkarni tags: core-8-branch
2023-03-08
20:16
Merge core-8-branch check-in: 13b04d4d60 user: jan.nijtmans tags: trunk, main

Changes to generic/tclUtf.c.
178
179
180
181
182
183
184
185
186


187
188
189
190
191
192
193
194

195
196
197


198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213




214
215
216



217
218
219
220
221
222
223
178
179
180
181
182
183
184


185
186
187
188
189
190
191
192
193

194



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209



210
211
212
213



214
215
216
217
218
219
220
221
222
223







-
-
+
+







-
+
-
-
-
+
+













-
-
-
+
+
+
+
-
-
-
+
+
+







}

/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UniCharToUtf --
 *
 *	Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the
 *	provided buffer. Equivalent to Plan 9 runetochar().
 *	Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the provided
 *	buffer. Equivalent to Plan 9 runetochar().
 *
 *	Surrogate pairs are handled as follows: When ch is a high surrogate,
 *	the first byte of the 4-byte UTF-8 sequence is stored in the buffer and
 *	the function returns 1. If the function is called again with a low
 *	surrogate and the same buffer, the remaining 3 bytes of the 4-byte
 *	UTF-8 sequence are produced.
 *
 *	If no low surrogate follows the high surrogate (which is actually
 *	If no low surrogate follows the high surrogate (which is actually illegal),
 *	illegal), this can be handled reasonably by calling Tcl_UniCharToUtf
 *	again with ch = -1. This produces a 3-byte UTF-8 sequence
 *	representing the high surrogate.
 *	calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8
 *	sequence representing the high surrogate.
 *
 * Results:
 *	Returns the number of bytes stored into the buffer.
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

#undef Tcl_UniCharToUtf
size_t
Tcl_UniCharToUtf(
    int ch,			/* The Tcl_UniChar to be stored in the
				 * buffer. Can be or'ed with flag TCL_COMBINE */
    char *buf)			/* Buffer in which the UTF-8 representation of
    int ch,	/* The Tcl_UniChar to be stored in the
		 * buffer. Can be or'ed with flag TCL_COMBINE
		 */
    char *buf)	/* Buffer in which the UTF-8 representation of
				 * the Tcl_UniChar is stored. Buffer must be
				 * large enough to hold the UTF-8 character
				 * (at most 4 bytes). */
		 * ch is stored. Must be large enough to hold the UTF-8
		 * character (at most 4 bytes).
		 */
{
#if TCL_UTF_MAX > 3
    int flags = ch;
#endif

    if (ch >= TCL_COMBINE) {
	ch &= (TCL_COMBINE - 1);
246
247
248
249
250
251
252




253

254
255
256
257
258
259
260
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265







+
+
+
+

+







			buf[2]  = (char) (0x80 | (0x3F & ch));
			buf[1] |= (char) (0x80 | (0x0F & (ch >> 6)));
			return 3;
		    }
		    /* Previous Tcl_UniChar was not a high surrogate, so just output */
		} else {
		    /* High surrogate */

		    /* Add 0x10000 to the raw number encoded in the surrogate
		     * pair in order to get the code point.
		    */
		    ch += 0x40;

		    /* Fill buffer with specific 3-byte (invalid) byte combination,
		       so following low surrogate can recognize it and combine */
		    buf[2] = (char) ((ch << 4) & 0x30);
		    buf[1] = (char) (0x80 | (0x3F & (ch >> 2)));
		    buf[0] = (char) (0xF0 | (0x07 & (ch >> 8)));
		    return 1;
		}