Index: src/blob.c ================================================================== --- src/blob.c +++ src/blob.c @@ -1095,35 +1095,30 @@ ** done. If useMbcs is false and there is no BOM, the input string is assumed ** to be UTF-8 already, so no conversion is done. */ void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){ char *zUtf8; - int bomSize = 0; - if( starts_with_utf8_bom(pBlob, &bomSize) ){ + int bomSize = starts_with_bom(pBlob); + if( bomSize == 3 ){ struct Blob temp; zUtf8 = blob_str(pBlob) + bomSize; blob_zero(&temp); blob_append(&temp, zUtf8, -1); blob_swap(pBlob, &temp); blob_reset(&temp); #ifdef _WIN32 - }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){ - /* Make sure the blob contains two terminating 0-bytes */ - blob_append(pBlob, "", 1); - zUtf8 = blob_str(pBlob) + bomSize; - zUtf8 = fossil_unicode_to_utf8(zUtf8); - blob_zero(pBlob); - blob_append(pBlob, zUtf8, -1); - fossil_unicode_free(zUtf8); - }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){ - unsigned int i = blob_size(pBlob); + }else if( bomSize == 2 ){ zUtf8 = blob_buffer(pBlob); - while( i > 0 ){ - /* swap bytes of unicode representation */ - char zTemp = zUtf8[--i]; - zUtf8[i] = zUtf8[i-1]; - zUtf8[--i] = zTemp; + if (*((unsigned short *)zUtf8) == 0xfffe) { + /* Found BOM, but with reversed bytes */ + unsigned int i = blob_size(pBlob); + while( i > 0 ){ + /* swap bytes of unicode representation */ + char zTemp = zUtf8[--i]; + zUtf8[i] = zUtf8[i-1]; + zUtf8[--i] = zTemp; + } } /* Make sure the blob contains two terminating 0-bytes */ blob_append(pBlob, "", 1); zUtf8 = blob_str(pBlob) + bomSize; zUtf8 = fossil_unicode_to_utf8(zUtf8); Index: src/checkin.c ================================================================== --- src/checkin.c +++ src/checkin.c @@ -899,17 +899,17 @@ int binOk, /* Non-zero if binary warnings should be disabled. */ int encodingOk, /* Non-zero if encoding warnings should be disabled. */ const char *zFilename /* The full name of the file being committed. */ ){ int eType; /* return value of looks_like_utf8/utf16() */ - int fUnicode; /* return value of starts_with_utf16_bom() */ + int fUnicode; /* 1 if blob starts with UTF-16 BOM */ char *zMsg; /* Warning message */ Blob fname; /* Relative pathname of the file */ static int allOk = 0; /* Set to true to disable this routine */ if( allOk ) return 0; - fUnicode = starts_with_utf16_bom(p, 0); + fUnicode = (starts_with_bom(p) == 2); eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); if( eType==0 || eType==-1 || fUnicode ){ const char *zWarning; const char *zDisable; const char *zConvert = "c=convert/"; Index: src/diff.c ================================================================== --- src/diff.c +++ src/diff.c @@ -340,72 +340,31 @@ if( pnByte ) *pnByte = 3; return bom; } /* -** This function returns non-zero if the blob starts with a UTF-8 -** byte-order-mark (BOM). +** This function returns detected BOM size if the blob starts with +** a UTF-8, UTF-16le or UTF-16be byte-order-mark (BOM). */ -int starts_with_utf8_bom(const Blob *pContent, int *pnByte){ +int starts_with_bom(const Blob *pContent){ const char *z = blob_buffer(pContent); - int bomSize = 0; + int c1, bomSize = 0; const unsigned char *bom = get_utf8_bom(&bomSize); - if( pnByte ) *pnByte = bomSize; - if( blob_size(pContent)=bomSize) + && (memcmp(z, bom, bomSize)==0) ){ + return bomSize; + } + /* Only accept UTF-16 BOM if the blob has an even number of bytes */ + if( (blob_size(pContent)<2) || (blob_size(pContent)&1) ) return 0; + c1 = *((unsigned short *)z); + if( (c1==0xfffe) || (c1==0xfeff) ){ + if( blob_size(pContent)>=4 ){ + /* For UTF-32 BOM, always return 0. */ + if( ((unsigned short *)z)[1] == 0 ) return 0; + } + return 2; } return 0; } /* @@ -2369,11 +2328,11 @@ if( zLimit==0 || zLimit[0]==0 ) zLimit = "-1"; iLimit = atoi(zLimit); showLog = find_option("log",0,0)!=0; fileVers = find_option("filevers",0,0)!=0; db_must_be_within_tree(); - if (g.argc<3) { + if( g.argc<3 ){ usage("FILENAME"); } file_tree_name(g.argv[2], &treename, 1); zFilename = blob_str(&treename); fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename); @@ -2383,11 +2342,11 @@ fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename); if( fid==0 ){ fossil_fatal("not part of current checkout: %s", zFilename); } cid = db_lget_int("checkout", 0); - if (cid == 0){ + if( cid == 0 ){ fossil_fatal("Not in a checkout"); } if( iLimit<=0 ) iLimit = 1000000000; compute_direct_ancestors(cid, iLimit); mid = db_int(0, "SELECT mlink.mid FROM mlink, ancestor "