Fossil

Check-in [fa50694405]
Login

Check-in [fa50694405]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Some optimizations. No change in functionality. - In clean_cmd, use new function vfile_scan2. - When checking for multiple bytes/characters always do that in order of likelihood: '\n' is more likely than '\0', which is more likely than '\r'. - Off-by-one error in looks_like_utf16(). - Allow starts_with_utf16_bom() to be used in detecting UTF-16 without BOM as well: Always determine pbReverse, even if no BOM is present. - Remove unused variable.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fa506944056737fddefcee01652f6e12e75b9a53
User & Date: jan.nijtmans 2013-05-13 09:12:07.074
Context
2013-05-13
09:21
One more off-by-one error. ... (check-in: 1484625d6e user: jan.nijtmans tags: trunk)
09:12
Some optimizations. No change in functionality. - In clean_cmd, use new function vfile_scan2. - When checking for multiple bytes/characters always do that in order of likelihood: '\n' is more likely than '\0', which is more likely than '\r'. - Off-by-one error in looks_like_utf16(). - Allow starts_with_utf16_bom() to be used in detecting UTF-16 without BOM as well: Always determine pbReverse, even if no BOM is present. - Remove unused variable. ... (check-in: fa50694405 user: jan.nijtmans tags: trunk)
2013-05-09
15:30
Add submenu elements to the web-based "annotate" display to turn features on and off. Make the default depth of an annotation 20. ... (check-in: f4bcdb62fb user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/checkin.c.
445
446
447
448
449
450
451
452
453

454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
  db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY %s)",
                filename_collation());
  n = strlen(g.zLocalRoot);
  blob_init(&path, g.zLocalRoot, n-1);
  pIgnore = glob_create(zIgnoreFlag);
  pKeep = glob_create(zKeepFlag);
  pClean = glob_create(zCleanFlag);
  vfile_scan(&path, blob_size(&path), scanFlags, pKeep);
  glob_free(pKeep);

  db_prepare(&q,
      "SELECT %Q || x FROM sfile"
      " WHERE x NOT IN (%s)"
      " ORDER BY 1",
      g.zLocalRoot, fossil_all_reserved_names(0)
  );
  if( file_tree_name(g.zRepositoryName, &repo, 0) ){
    db_multi_exec("DELETE FROM sfile WHERE x=%B", &repo);
  }
  db_multi_exec("DELETE FROM sfile WHERE x IN (SELECT pathname FROM vfile)");
  while( db_step(&q)==SQLITE_ROW ){
    const char *zName = db_column_text(&q, 0);
    if( glob_match(pIgnore, zName+n) ) continue;
    if( !allFlag && !glob_match(pClean, zName+n) ){
      Blob ans;
      char cReply;
      char *prompt = mprintf("remove unmanaged file \"%s\" (a=all/y/N)? ",
                             zName+n);
      blob_zero(&ans);
      prompt_user(prompt, &ans);
      cReply = blob_str(&ans)[0];
      if( cReply=='a' || cReply=='A' ){
        allFlag = 1;
      }else if( cReply!='y' && cReply!='Y' ){
        continue;
      }
    }
    fossil_print("removed unmanaged file \"%s\"\n", zName+n);
    if( !dryRunFlag ){
      file_delete(zName);
    }
  }
  glob_free(pClean);
  glob_free(pIgnore);
  db_finalize(&q);
}

/*
** Prompt the user for a check-in or stash comment (given in pPrompt),
** gather the response, then return the response in pComment.
**







|

>












<




















<







445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466

467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

487
488
489
490
491
492
493
  db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY %s)",
                filename_collation());
  n = strlen(g.zLocalRoot);
  blob_init(&path, g.zLocalRoot, n-1);
  pIgnore = glob_create(zIgnoreFlag);
  pKeep = glob_create(zKeepFlag);
  pClean = glob_create(zCleanFlag);
  vfile_scan2(&path, blob_size(&path), scanFlags, pIgnore, pKeep);
  glob_free(pKeep);
  glob_free(pIgnore);
  db_prepare(&q,
      "SELECT %Q || x FROM sfile"
      " WHERE x NOT IN (%s)"
      " ORDER BY 1",
      g.zLocalRoot, fossil_all_reserved_names(0)
  );
  if( file_tree_name(g.zRepositoryName, &repo, 0) ){
    db_multi_exec("DELETE FROM sfile WHERE x=%B", &repo);
  }
  db_multi_exec("DELETE FROM sfile WHERE x IN (SELECT pathname FROM vfile)");
  while( db_step(&q)==SQLITE_ROW ){
    const char *zName = db_column_text(&q, 0);

    if( !allFlag && !glob_match(pClean, zName+n) ){
      Blob ans;
      char cReply;
      char *prompt = mprintf("remove unmanaged file \"%s\" (a=all/y/N)? ",
                             zName+n);
      blob_zero(&ans);
      prompt_user(prompt, &ans);
      cReply = blob_str(&ans)[0];
      if( cReply=='a' || cReply=='A' ){
        allFlag = 1;
      }else if( cReply!='y' && cReply!='Y' ){
        continue;
      }
    }
    fossil_print("removed unmanaged file \"%s\"\n", zName+n);
    if( !dryRunFlag ){
      file_delete(zName);
    }
  }
  glob_free(pClean);

  db_finalize(&q);
}

/*
** Prompt the user for a check-in or stash comment (given in pPrompt),
** gather the response, then return the response in pComment.
**
Changes to src/diff.c.
241
242
243
244
245
246
247



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273


274
275
276
277
278
279
280
int looks_like_utf8(const Blob *pContent, int stopFlags){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;



  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;


    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<=1 || z[1]!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }







>
>
>
|







<
<



|
<
<










>
>







241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258


259
260
261
262


263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
int looks_like_utf8(const Blob *pContent, int stopFlags){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;
  j = (c!='\n');
  if( !j ){
    flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  }else if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }


  while( !(flags&stopFlags) && --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c=='\n' ){


      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<=1 || z[1]!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
355
356
357
358
359
360
361



362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393


394
395
396
397
398
399
400
401
402
403
    flags |= LOOK_ODD;  /* Odd number of bytes -> binary (UTF-8?) */
    if( n<sizeof(WCHAR_T) ) return flags;  /* One byte -> binary (UTF-8?) */
  }
  c = *z;
  if( bReverse ){
    c = UTF16_SWAP(c);
  }



  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( 1 ){
    int c2 = c;
    n -= sizeof(WCHAR_T);
    if( (flags&stopFlags) || n<sizeof(WCHAR_T) ) break;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>UTF16_LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;


    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* Not the whole blob is examined */
  }







>
>
>
|







<
<









|
<
<










>
>


|







356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373


374
375
376
377
378
379
380
381
382
383


384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
    flags |= LOOK_ODD;  /* Odd number of bytes -> binary (UTF-8?) */
    if( n<sizeof(WCHAR_T) ) return flags;  /* One byte -> binary (UTF-8?) */
  }
  c = *z;
  if( bReverse ){
    c = UTF16_SWAP(c);
  }
  j = (c!='\n');
  if( !j ){
    flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  }else if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }


  while( 1 ){
    int c2 = c;
    n -= sizeof(WCHAR_T);
    if( (flags&stopFlags) || n<sizeof(WCHAR_T) ) break;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c=='\n' ){


      flags |= LOOK_LF;
      if( c2=='\r' ){
        flags |= (LOOK_CR | LOOK_CRLF);  /* Found LF preceded by CR */
      }else{
        flags |= LOOK_LONE_LF;
      }
      if( j>UTF16_LENGTH_MASK ){
        flags |= LOOK_LONG;  /* Very long line -> binary */
      }
      j = 0;
    }else if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\r' ){
      flags |= LOOK_CR;
      if( n<2*sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
        flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
      }
    }
  }
  if( n ){
    flags |= LOOK_SHORT;  /* Not the whole blob is examined */
  }
434
435
436
437
438
439
440
441




442
443
444
445
446
447
448
449
450

451
452
453
454
455
456
457
458


459
460
461
462
463
464
465
}

/*
** This function returns non-zero if the blob starts with a UTF-16
** byte-order-mark (BOM), either in the endianness of the machine
** or in reversed byte order. The UTF-32 BOM is ruled out by checking
** if the UTF-16 BOM is not immediately followed by (utf16) 0.
** pnByte and pbReverse are only set when the function returns 1.




*/
int starts_with_utf16_bom(
  const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
  int *pnByte,          /* OUT: The number of bytes used for the BOM. */
  int *pbReverse        /* OUT: Non-zero for BOM in reverse byte-order. */
){
  const unsigned short *z = (unsigned short *)blob_buffer(pContent);
  int bomSize = sizeof(unsigned short);
  int size = blob_size(pContent);


  if( size<bomSize ) return 0;  /* No: cannot read BOM. */
  if( size>=(2*bomSize) && z[1]==0 ) return 0;  /* No: possible UTF-32. */
  if( z[0]==0xfffe ){
    if( pbReverse ) *pbReverse = 1;
  }else if( z[0]==0xfeff ){
    if( pbReverse ) *pbReverse = 0;
  }else{


    return 0; /* No: UTF-16 byte-order-mark not found. */
  }
  if( pnByte ) *pnByte = bomSize;
  return 1; /* Yes. */
}

/*







|
>
>
>
>









>

|
|
|
|
|
|

>
>







436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
}

/*
** This function returns non-zero if the blob starts with a UTF-16
** byte-order-mark (BOM), either in the endianness of the machine
** or in reversed byte order. The UTF-32 BOM is ruled out by checking
** if the UTF-16 BOM is not immediately followed by (utf16) 0.
** pnByte is only set when the function returns 1.
**
** pbReverse is always set, even when no BOM is found. Without BOM,
** it is set to 1 on little-endian and 0 on big-endian platforms. See
** clause D98 of conformance (section 3.10) of the Unicode standard.
*/
int starts_with_utf16_bom(
  const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
  int *pnByte,          /* OUT: The number of bytes used for the BOM. */
  int *pbReverse        /* OUT: Non-zero for BOM in reverse byte-order. */
){
  const unsigned short *z = (unsigned short *)blob_buffer(pContent);
  int bomSize = sizeof(unsigned short);
  int size = blob_size(pContent);
  static const int one = 1;

  if( size<bomSize ) goto noBom;  /* No: cannot read BOM. */
  if( size>=(2*bomSize) && z[1]==0 ) goto noBom;  /* No: possible UTF-32. */
  if( z[0]==0xfeff ){
    if( pbReverse ) *pbReverse = 0;
  }else if( z[0]==0xfffe ){
    if( pbReverse ) *pbReverse = 1;
  }else{
  noBom:
    if( pbReverse ) *pbReverse = *(char *) &one;
    return 0; /* No: UTF-16 byte-order-mark not found. */
  }
  if( pnByte ) *pnByte = bomSize;
  return 1; /* Yes. */
}

/*
Changes to src/main.c.
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
#else
int main(int argc, char **argv)
#endif
{
  const char *zCmdName = "unknown";
  int idx;
  int rc;
  int timerId;
  sqlite3_config(SQLITE_CONFIG_LOG, fossil_sqlite_log, 0);
  memset(&g, 0, sizeof(g));
  g.now = time(0);
#ifdef FOSSIL_ENABLE_JSON
#if defined(NDEBUG)
  g.json.errorDetailParanoia = 2 /* FIXME: make configurable
                                    One problem we have here is that this







<







511
512
513
514
515
516
517

518
519
520
521
522
523
524
#else
int main(int argc, char **argv)
#endif
{
  const char *zCmdName = "unknown";
  int idx;
  int rc;

  sqlite3_config(SQLITE_CONFIG_LOG, fossil_sqlite_log, 0);
  memset(&g, 0, sizeof(g));
  g.now = time(0);
#ifdef FOSSIL_ENABLE_JSON
#if defined(NDEBUG)
  g.json.errorDetailParanoia = 2 /* FIXME: make configurable
                                    One problem we have here is that this
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
#endif /* FOSSIL_ENABLE_JSON */
  expand_args_option(argc, argv);
#ifdef FOSSIL_ENABLE_TCL
  memset(&g.tcl, 0, sizeof(TclContext));
  g.tcl.argc = g.argc;
  g.tcl.argv = copy_args(g.argc, g.argv); /* save full arguments */
#endif

  g.mainTimerId = fossil_timer_start();
  
  if( fossil_getenv("GATEWAY_INTERFACE")!=0 && !find_option("nocgi", 0, 0)){
    zCmdName = "cgi";
    g.isHTTP = 1;
  }else if( g.argc<2 ){
    fossil_print(
       "Usage: %s COMMAND ...\n"
       "   or: %s help           -- for a list of common commands\n"







<

<







533
534
535
536
537
538
539

540

541
542
543
544
545
546
547
#endif /* FOSSIL_ENABLE_JSON */
  expand_args_option(argc, argv);
#ifdef FOSSIL_ENABLE_TCL
  memset(&g.tcl, 0, sizeof(TclContext));
  g.tcl.argc = g.argc;
  g.tcl.argv = copy_args(g.argc, g.argv); /* save full arguments */
#endif

  g.mainTimerId = fossil_timer_start();

  if( fossil_getenv("GATEWAY_INTERFACE")!=0 && !find_option("nocgi", 0, 0)){
    zCmdName = "cgi";
    g.isHTTP = 1;
  }else if( g.argc<2 ){
    fossil_print(
       "Usage: %s COMMAND ...\n"
       "   or: %s help           -- for a list of common commands\n"