Fossil

Check-in [fab09a1710]
Login

Check-in [fab09a1710]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Eliminate use of starts_with_utf16(be|le)_bom functions, starts_with_utf16_bom should be enough. External code will typically call "starts_with_utf16_bom" first, and if it returns true call "blob_to_utf8_no_bom" converting it to utf-8. There is no reason any more then for external code to know wheter the BOM was le or be.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fab09a17105957aac5ced72bfbf65e1fc6ae0429
User & Date: jan.nijtmans 2013-02-08 09:37:10.905
Context
2013-02-12
11:53
Replaced a call to realloc() with cson_realloc() (which, in turn, uses the fossil realloc). ... (check-in: a1d2cd84b8 user: stephan tags: trunk)
10:08
timeline.rss with single-ticket support, based on David Given's patch. ... (check-in: 1bc09124bd user: stephan tags: timeline-rss-ticket)
2013-02-11
19:30
Merging from trunk. I still have merge conflicts since my previous merge. ... (check-in: bdeb633a6c user: viriketo tags: annotate_links)
2013-02-08
09:37
Eliminate use of starts_with_utf16(be|le)_bom functions, starts_with_utf16_bom should be enough. External code will typically call "starts_with_utf16_bom" first, and if it returns true call "blob_to_utf8_no_bom" converting it to utf-8. There is no reason any more then for external code to know wheter the BOM was le or be. ... (check-in: fab09a1710 user: jan.nijtmans tags: trunk)
08:55
Addendum to previous commit: Allow the user to decide whether the "fossil knows nothing about" warning should abort the commit or not. ... (check-in: c31bbd4084 user: jan.nijtmans tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/blob.c.
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117

1118
1119
1120
1121
1122
1123
1124

1125
1126
1127
1128
1129
1130
1131
    struct Blob temp;
    zUtf8 = blob_str(pBlob) + bomSize;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);
#ifdef _WIN32
  }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_unicode_free(zUtf8);
  }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){

    unsigned int i = blob_size(pBlob);
    zUtf8 = blob_buffer(pBlob);
    while( i > 0 ){
      /* swap bytes of unicode representation */
      char zTemp = zUtf8[--i];
      zUtf8[i] = zUtf8[i-1];
      zUtf8[--i] = zTemp;

    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);







|
<
<
|
<
<
<
|
<
>
|
<
|
|
|
|
|
>







1102
1103
1104
1105
1106
1107
1108
1109


1110



1111

1112
1113

1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
    struct Blob temp;
    zUtf8 = blob_str(pBlob) + bomSize;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);
#ifdef _WIN32
  }else if( starts_with_utf16_bom(pBlob, &bomSize) ){


    zUtf8 = blob_buffer(pBlob);



    if (*((unsigned short *)zUtf8) == 0xfffe) {

      /* Found BOM, but with reversed bytes */
      unsigned int i = blob_size(pBlob);

      while( i > 0 ){
        /* swap bytes of unicode representation */
        char zTemp = zUtf8[--i];
        zUtf8[i] = zUtf8[i-1];
        zUtf8[--i] = zTemp;
      }
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
Changes to src/diff.c.
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
  const char *z = blob_buffer(pContent);
  int c1;

  if( pnByte ) *pnByte = 2;
  if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
  c1 = ((unsigned short *)z)[0];
  if( (c1==0xfeff) || (c1==0xfffe) ){
    if( blob_size(pContent) < 4 ) return 1;
    c1 = ((unsigned short *)z)[1];
    if( c1 != 0 ) return 1;
  }
  return 0;
}

/*
** This function returns non-zero if the blob starts with a UTF-16le
** byte-order-mark (BOM).
*/
int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
  const char *z = blob_buffer(pContent);
  int c1;

  if( pnByte ) *pnByte = 2;
  if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
  c1 = ((unsigned short *)z)[0];
  if( c1==0xfeff ){
    if( blob_size(pContent) < 4 ) return 1;
    c1 = ((unsigned short *)z)[1];
    if( c1 != 0 ) return 1;
  }
  return 0;
}

/*
** This function returns non-zero if the blob starts with a UTF-16be
** byte-order-mark (BOM).
*/
int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
  const char *z = blob_buffer(pContent);
  int c1;

  if( pnByte ) *pnByte = 2;
  if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
  c1 = ((unsigned short *)z)[0];
  if( c1==0xfffe ){
    if( blob_size(pContent) < 4 ) return 1;
    c1 = ((unsigned short *)z)[1];
    if( c1 != 0 ) return 1;
  }
  return 0;
}








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







363
364
365
366
367
368
369






































370
371
372
373
374
375
376
  const char *z = blob_buffer(pContent);
  int c1;

  if( pnByte ) *pnByte = 2;
  if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
  c1 = ((unsigned short *)z)[0];
  if( (c1==0xfeff) || (c1==0xfffe) ){






































    if( blob_size(pContent) < 4 ) return 1;
    c1 = ((unsigned short *)z)[1];
    if( c1 != 0 ) return 1;
  }
  return 0;
}