Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch bomRefactor Excluding Merge-Ins
This is equivalent to a diff from a4cdc7235a to 3f2f1e62fa
2013-03-19
| ||
17:40 | Merge UTF-16 byte swapping fix and test-looks-like-utf command enhancements. check-in: b4bec3753d user: mistachkin tags: trunk | |
17:37 | Fix overly eager byte swapping when checking for UTF-16 text. Closed-Leaf check-in: 3f2f1e62fa user: mistachkin tags: bomRefactor | |
08:59 | Merge "cr-warning" branch to trunk: Fossil now warns before committing files with CR line-endings and offers to convert them to LF line-endings; fossil's diff cannot handle those. In checkin.c, use LOOK_BINARY in stead of LOOK_NUL, in case more flags are added to the BINARY detection. Rename LOOK_LENGTH to LOOK_LONG. check-in: ea2598e447 user: jan.nijtmans tags: trunk | |
08:34 | Fix expected value of test-cases: The value of LOOK_LONE_CR is wrong in 19 cases. I leave it to Joe to fix the code. check-in: 8af1541ac5 user: jan.nijtmans tags: bomRefactor | |
2013-03-18
| ||
23:47 | Make sure that LOOK_CR is set even when a CR/LF pair is detected. Rename the LOOK_LENGTH flag to LOOK_LONG for clarify. Add LOOK_SHORT flag to indicate that the looks_like_utf16() function did not perform a full check. Support tests for UTF-16 in reverse byte order. Enhancements to the test-looks-like-utf command. check-in: b0b3f2a44f user: mistachkin tags: bomRefactor | |
12:37 | Adapt test-case 112 such that it contains a reversed CR/LF, a case not covered before. Fix detection of reversed CR/LF and lone CR in reversed UTF-16 case, broken by [e3f9a42b58]. check-in: a4cdc7235a user: jan.nijtmans tags: trunk | |
11:45 | Add test-cases using reverse UTF-16 BOM, and the unicode characters U+0A00 and U+0D00 check-in: d1f0c4b93e user: jan.nijtmans tags: trunk | |
Changes to src/checkin.c.
︙ | ︙ | |||
908 909 910 911 912 913 914 | const char *zFilename /* The full name of the file being committed. */ ){ int bReverse; /* UTF-16 byte order is reversed? */ int fUnicode; /* return value of starts_with_utf16_bom() */ int lookFlags; /* output flags from looks_like_utf8/utf16() */ int fHasNul; /* the blob contains one or more NUL chars */ int fHasCrLf; /* the blob contains one or more CR/LF pairs */ | | | | | | | 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 | const char *zFilename /* The full name of the file being committed. */ ){ int bReverse; /* UTF-16 byte order is reversed? */ int fUnicode; /* return value of starts_with_utf16_bom() */ int lookFlags; /* output flags from looks_like_utf8/utf16() */ int fHasNul; /* the blob contains one or more NUL chars */ int fHasCrLf; /* the blob contains one or more CR/LF pairs */ int fHasLong; /* the blob contains an overly long line */ char *zMsg; /* Warning message */ Blob fname; /* Relative pathname of the file */ static int allOk = 0; /* Set to true to disable this routine */ if( allOk ) return 0; fUnicode = could_be_utf16(p, &bReverse); if( fUnicode ){ lookFlags = looks_like_utf16(p, bReverse); }else{ lookFlags = looks_like_utf8(p); } fHasNul = (lookFlags & LOOK_NUL); fHasCrLf = (lookFlags & LOOK_CRLF); fHasLong = (lookFlags & LOOK_LONG); if( fHasNul || fHasLong || fHasCrLf || fUnicode ){ const char *zWarning; const char *zDisable; const char *zConvert = "c=convert/"; Blob ans; char cReply; if( fHasNul || fHasLong ){ if( binOk ){ return 0; /* We don't want binary warnings for this file. */ } if( !fHasNul && fHasLong ){ zWarning = "long lines"; }else{ zWarning = "binary data"; } zDisable = "\"binary-glob\" setting"; zConvert = ""; /* We cannot convert binary files. */ }else if( fHasCrLf && fUnicode ){ |
︙ | ︙ |
Changes to src/diff.c.
︙ | ︙ | |||
72 73 74 75 76 77 78 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */ #define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */ #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ #define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */ #define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */ #define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */ | | > | | 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */ #define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */ #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ #define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */ #define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */ #define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */ #define LOOK_LONG ((int)0x00000040) /* An over length line was found. */ #define LOOK_ODD ((int)0x00000080) /* An odd number of bytes was found. */ #define LOOK_SHORT ((int)0x00000100) /* Unable to perform full check. */ #define LOOK_BINARY (LOOK_NUL | LOOK_LONG | LOOK_SHORT) /* May be binary. */ #endif /* INTERFACE */ /* ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) */ #define LENGTH_MASK_SZ 13 #define LENGTH_MASK ((1<<LENGTH_MASK_SZ)-1) |
︙ | ︙ | |||
255 256 257 258 259 260 261 | int c2 = c; c = *++z; ++j; if( c==0 ){ flags |= LOOK_NUL; /* NUL character in a file -> binary */ }else if( c=='\n' ){ flags |= LOOK_LF; if( c2=='\r' ){ | | | | | | | > | 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | int c2 = c; c = *++z; ++j; if( c==0 ){ flags |= LOOK_NUL; /* NUL character in a file -> binary */ }else if( c=='\n' ){ flags |= LOOK_LF; if( c2=='\r' ){ flags |= (LOOK_CR | LOOK_CRLF); /* Found LF preceded by CR */ }else{ flags |= LOOK_LONE_LF; } if( j>LENGTH_MASK ){ flags |= LOOK_LONG; /* Very long line -> binary */ } j = 0; }else if( c=='\r' ){ flags |= LOOK_CR; if( n<=1 || z[1]!='\n' ){ flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ } } } if( j>LENGTH_MASK ){ flags |= LOOK_LONG; /* Very long line -> binary */ } return flags; } /* ** Define the type needed to represent a Unicode (UTF-16) character. */ #ifndef WCHAR_T # ifdef _WIN32 # define WCHAR_T wchar_t # else # define WCHAR_T unsigned short # endif #endif /* ** Maximum length of a line in a text file, in UTF-16 characters. (4096) ** The number of bytes represented by this value cannot exceed LENGTH_MASK ** bytes, because that is the line buffer size used by the diff engine. */ #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) /* ** This macro is used to swap the byte order of a UTF-16 character in the ** looks_like_utf16() function. */ #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) #define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch)) /* ** This function attempts to scan each logical line within the blob to ** determine the type of content it appears to contain. The return value ** is a combination of one or more of the LOOK_XXX flags (see above): ** ** !LOOK_BINARY -- The content appears to consist entirely of text; however, |
︙ | ︙ | |||
344 345 346 347 348 349 350 351 352 | if( n==0 ) return flags; /* Empty file -> text */ if( n%sizeof(WCHAR_T) ){ flags |= LOOK_ODD; /* Odd number of bytes -> binary (UTF-8?) */ if( n<sizeof(WCHAR_T) ) return flags; /* One byte -> binary (UTF-8?) */ } c = *z; if( c==0 ){ flags |= LOOK_NUL; /* NUL character in a file -> binary */ | > > > | > | > > > > > < < < | > | | | | | > > | | < < | | 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 | if( n==0 ) return flags; /* Empty file -> text */ if( n%sizeof(WCHAR_T) ){ flags |= LOOK_ODD; /* Odd number of bytes -> binary (UTF-8?) */ if( n<sizeof(WCHAR_T) ) return flags; /* One byte -> binary (UTF-8?) */ } c = *z; if( bReverse ){ c = UTF16_SWAP(c); } if( c==0 ){ flags |= LOOK_NUL; /* NUL character in a file -> binary */ }else if( c=='\r' ){ flags |= LOOK_CR; if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ } } j = (c!='\n'); if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ while( 1 ){ int c2 = c; n -= sizeof(WCHAR_T); if( n<sizeof(WCHAR_T) ) break; c = *++z; if( bReverse ){ c = UTF16_SWAP(c); } ++j; if( c==0 ){ flags |= LOOK_NUL; /* NUL character in a file -> binary */ }else if( c=='\n' ){ flags |= LOOK_LF; if( c2=='\r' ){ flags |= (LOOK_CR | LOOK_CRLF); /* Found LF preceded by CR */ }else{ flags |= LOOK_LONE_LF; } if( j>UTF16_LENGTH_MASK ){ flags |= LOOK_LONG; /* Very long line -> binary */ } j = 0; }else if( c=='\r' ){ flags |= LOOK_CR; if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ } } } if( j>UTF16_LENGTH_MASK ){ flags |= LOOK_LONG; /* Very long line -> binary */ } return flags; } /* ** This function returns an array of bytes representing the byte-order-mark ** for UTF-8. |
︙ | ︙ | |||
2521 2522 2523 2524 2525 2526 2527 | */ void looks_like_utf_test_cmd(void){ Blob blob; /* the contents of the specified file */ int fUtf8; /* return value of starts_with_utf8_bom() */ int fUtf16; /* return value of starts_with_utf16_bom() */ int fUnicode; /* return value of could_be_utf16() */ int lookFlags; /* output flags from looks_like_utf8/utf16() */ | | > | | | | | | | > | 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 | */ void looks_like_utf_test_cmd(void){ Blob blob; /* the contents of the specified file */ int fUtf8; /* return value of starts_with_utf8_bom() */ int fUtf16; /* return value of starts_with_utf16_bom() */ int fUnicode; /* return value of could_be_utf16() */ int lookFlags; /* output flags from looks_like_utf8/utf16() */ int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */ int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */ if( g.argc!=3 ) usage("FILENAME"); blob_read_from_file(&blob, g.argv[2]); fUtf8 = starts_with_utf8_bom(&blob, 0); fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16); fUnicode = could_be_utf16(&blob, &bRevUnicode); lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode) : looks_like_utf8(&blob); fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); fossil_print("Starts with UTF-16 BOM: %s\n", fUtf16?(bRevUtf16?"reversed":"yes"):"no"); fossil_print("Looks like UTF-%s: %s\n",fUnicode?"16":"8", (lookFlags&LOOK_BINARY)?"no":"yes"); fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no"); fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no"); fossil_print("Has flag LOOK_LONE_CR: %s\n", (lookFlags&LOOK_LONE_CR)?"yes":"no"); fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no"); fossil_print("Has flag LOOK_LONE_LF: %s\n", (lookFlags&LOOK_LONE_LF)?"yes":"no"); fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no"); fossil_print("Has flag LOOK_LONG: %s\n",(lookFlags&LOOK_LONG)?"yes":"no"); fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no"); fossil_print("Has flag LOOK_SHORT: %s\n",(lookFlags&LOOK_SHORT)?"yes":"no"); blob_reset(&blob); } |
Changes to test/utf.test.
more than 10,000 changes