Changes On Branch simplify-starts-with
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch simplify-starts-with Excluding Merge-Ins

This is equivalent to a diff from 43c4522623 to c209105f0f

2013-02-07
15:28
Divide blob length check (even number of bytes) and UTF-32 check in the 3 versions of the UTF-16 BOM functions. check-in: be6756e26b user: jan.nijtmans tags: trunk
09:39
merge trunk check-in: 8994f3680a user: jan.nijtmans tags: improve_commit_warning
09:19
If file starts with UTF-32 BOM, always consider it binary without warning. Closed-Leaf check-in: c209105f0f user: jan.nijtmans tags: simplify-starts-with
08:47
Combine 4 "starts_with_utf??_bom" functions to a single - easier to use - function "starts_with_bom". In addition, it only checks for an UTF-16 BOM if the blob has an even number of bytes. check-in: 6c417d8bf5 user: jan.nijtmans tags: simplify-starts-with
02:08
Add the test-ssh-far-side command that can be used in place of a shell for the remote side of an ssh: sync. check-in: 43c4522623 user: drh tags: trunk
00:24
Add the shell= query parameter to the ssh: scheme for cloning and syncing. check-in: 2163cd9666 user: drh tags: trunk

Changes to src/blob.c.

1093
1094
1095
1096
1097
1098
1099
1100
1101


1102
1103
1104
1105
1106
1107
1108
1109

1110
1111
1112

1113
1114
1115
1116

1117
1118


1119
1120
1121
1122
1123
1124






1125
1126
1127
1128
1129
1130
1131
1093
1094
1095
1096
1097
1098
1099


1100
1101
1102
1103
1104
1105
1106
1107
1108

1109



1110




1111


1112
1113






1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126







-
-
+
+







-
+
-
-
-
+
-
-
-
-
+
-
-
+
+
-
-
-
-
-
-
+
+
+
+
+
+







** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
** done.  If useMbcs is false and there is no BOM, the input string is assumed
** to be UTF-8 already, so no conversion is done.
*/
void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
  char *zUtf8;
  int bomSize = 0;
  if( starts_with_utf8_bom(pBlob, &bomSize) ){
  int bomSize = starts_with_bom(pBlob);
  if( bomSize == 3 ){
    struct Blob temp;
    zUtf8 = blob_str(pBlob) + bomSize;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);
#ifdef _WIN32
  }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
  }else if( bomSize == 2 ){
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = blob_buffer(pBlob);
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_unicode_free(zUtf8);
    if (*((unsigned short *)zUtf8) == 0xfffe) {
  }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
    unsigned int i = blob_size(pBlob);
      /* Found BOM, but with reversed bytes */
      unsigned int i = blob_size(pBlob);
    zUtf8 = blob_buffer(pBlob);
    while( i > 0 ){
      /* swap bytes of unicode representation */
      char zTemp = zUtf8[--i];
      zUtf8[i] = zUtf8[i-1];
      zUtf8[--i] = zTemp;
      while( i > 0 ){
        /* swap bytes of unicode representation */
        char zTemp = zUtf8[--i];
        zUtf8[i] = zUtf8[i-1];
        zUtf8[--i] = zTemp;
      }
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + bomSize;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);

Changes to src/checkin.c.

897
898
899
900
901
902
903
904

905
906
907
908
909
910

911
912
913
914
915
916
917
897
898
899
900
901
902
903

904
905
906
907
908
909

910
911
912
913
914
915
916
917







-
+





-
+







  Blob *p,              /* The content of the file being committed. */
  int crnlOk,           /* Non-zero if CR/NL warnings should be disabled. */
  int binOk,            /* Non-zero if binary warnings should be disabled. */
  int encodingOk,        /* Non-zero if encoding warnings should be disabled. */
  const char *zFilename /* The full name of the file being committed. */
){
  int eType;              /* return value of looks_like_utf8/utf16() */
  int fUnicode;           /* return value of starts_with_utf16_bom() */
  int fUnicode;           /* 1 if  blob starts with UTF-16 BOM */
  char *zMsg;             /* Warning message */
  Blob fname;             /* Relative pathname of the file */
  static int allOk = 0;   /* Set to true to disable this routine */

  if( allOk ) return 0;
  fUnicode = starts_with_utf16_bom(p, 0);
  fUnicode = (starts_with_bom(p) == 2);
  eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
  if( eType==0 || eType==-1 || fUnicode ){
    const char *zWarning;
    const char *zDisable;
    const char *zConvert = "c=convert/";
    Blob ans;
    char cReply;

Changes to src/diff.c.

338
339
340
341
342
343
344
345
346


347
348

349
350

351
352
353
354
355


356
357
358
359
360
361
362
363
364
365
366
367
368
369
370

371
372
373
374
375
376

377
378
379
380
381
382

383
384
385
386



387
388
389
390
391

392
393

394
395
396
397
398
399
400
401

402
403
404
405
406

407
408
409
410
411
412
413
338
339
340
341
342
343
344


345
346
347

348
349

350
351
352



353
354















355


356



357






358




359
360
361





362


363








364





365
366
367
368
369
370
371
372







-
-
+
+

-
+

-
+


-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-

-
-
-
+
-
-
-
-
-
-
+
-
-
-
-
+
+
+
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
+







    0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
  };
  if( pnByte ) *pnByte = 3;
  return bom;
}

/*
** This function returns non-zero if the blob starts with a UTF-8
** byte-order-mark (BOM).
** This function returns detected BOM size if the blob starts with
** a UTF-8, UTF-16le or UTF-16be byte-order-mark (BOM).
*/
int starts_with_utf8_bom(const Blob *pContent, int *pnByte){
int starts_with_bom(const Blob *pContent){
  const char *z = blob_buffer(pContent);
  int bomSize = 0;
  int c1, bomSize = 0;
  const unsigned char *bom = get_utf8_bom(&bomSize);

  if( pnByte ) *pnByte = bomSize;
  if( blob_size(pContent)<bomSize ) return 0;
  return memcmp(z, bom, bomSize)==0;
  if( (blob_size(pContent)>=bomSize)
      && (memcmp(z, bom, bomSize)==0) ){
}

/*
** This function returns non-zero if the blob starts with a UTF-16le or
** UTF-16be byte-order-mark (BOM).
*/
int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
  const char *z = blob_buffer(pContent);
  int c1, c2;

  if( pnByte ) *pnByte = 2;
  if( blob_size(pContent)<2 ) return 0;
  c1 = z[0]; c2 = z[1];
  if( (c1==(char)0xff) && (c2==(char)0xfe) ){
    return 1;
    return bomSize;
  }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
    return 1;
  }
  return 0;
}

  /* Only accept UTF-16 BOM if the blob has an even number of bytes */
/*
** This function returns non-zero if the blob starts with a UTF-16le
** byte-order-mark (BOM).
*/
int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
  const char *z = blob_buffer(pContent);
  if( (blob_size(pContent)<2) || (blob_size(pContent)&1) ) return 0;
  int c1, c2;

  if( pnByte ) *pnByte = 2;
  if( blob_size(pContent)<2 ) return 0;
  c1 = *((unsigned short *)z);
  if( (c1==0xfffe) || (c1==0xfeff) ){
    if( blob_size(pContent)>=4 ){
  c1 = z[0]; c2 = z[1];
  if( (c1==(char)0xff) && (c2==(char)0xfe) ){
    return 1;
  }
  return 0;
      /* For UTF-32 BOM, always return 0. */
}

      if( ((unsigned short *)z)[1] == 0 ) return 0;
/*
** This function returns non-zero if the blob starts with a UTF-16be
** byte-order-mark (BOM).
*/
int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
  const char *z = blob_buffer(pContent);
  int c1, c2;

    }
  if( pnByte ) *pnByte = 2;
  if( blob_size(pContent)<2 ) return 0;
  c1 = z[0]; c2 = z[1];
  if( (c1==(char)0xfe) && (c2==(char)0xff) ){
    return 1;
    return 2;
  }
  return 0;
}

/*
** Return true if two DLine elements are identical.
*/
2367
2368
2369
2370
2371
2372
2373
2374

2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388

2389
2390
2391
2392
2393
2394
2395
2326
2327
2328
2329
2330
2331
2332

2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346

2347
2348
2349
2350
2351
2352
2353
2354







-
+













-
+








  zLimit = find_option("limit",0,1);
  if( zLimit==0 || zLimit[0]==0 ) zLimit = "-1";
  iLimit = atoi(zLimit);
  showLog = find_option("log",0,0)!=0;
  fileVers = find_option("filevers",0,0)!=0;
  db_must_be_within_tree();
  if (g.argc<3) {
  if( g.argc<3 ){
    usage("FILENAME");
  }
  file_tree_name(g.argv[2], &treename, 1);
  zFilename = blob_str(&treename);
  fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename);
  if( fnid==0 ){
    fossil_fatal("no such file: %s", zFilename);
  }
  fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename);
  if( fid==0 ){
    fossil_fatal("not part of current checkout: %s", zFilename);
  }
  cid = db_lget_int("checkout", 0);
  if (cid == 0){
  if( cid == 0 ){
    fossil_fatal("Not in a checkout");
  }
  if( iLimit<=0 ) iLimit = 1000000000;
  compute_direct_ancestors(cid, iLimit);
  mid = db_int(0, "SELECT mlink.mid FROM mlink, ancestor "
          " WHERE mlink.fid=%d AND mlink.fnid=%d AND mlink.mid=ancestor.rid"
          " ORDER BY ancestor.generation ASC LIMIT 1",