Fossil

Check-in [f575af97b2]
Login

Check-in [f575af97b2]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Reformat the windows command-line parser to following the Fossil style. Use the alternative command-line parser on all windows builds, not just for MinGW builds, to simplify the logic and so that the alternative parser code is testing more heavily.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | mingw-broken-cmdline
Files: files | file ages | folders
SHA1: f575af97b2221e9e822cab27b8e153c70bcd01f8
User & Date: drh 2012-09-09 22:06:21.882
Context
2012-09-10
08:21
Add some test cases ... (Closed-Leaf check-in: d43165418c user: jan.nijtmans tags: mingw-broken-cmdline)
2012-09-09
22:06
Reformat the windows command-line parser to following the Fossil style. Use the alternative command-line parser on all windows builds, not just for MinGW builds, to simplify the logic and so that the alternative parser code is testing more heavily. ... (check-in: f575af97b2 user: drh tags: mingw-broken-cmdline)
20:53
Fix ticket [906c533302]. If you want to replace the mingw command-line pa a better one (conforming to ms rules), compile with -DMINGW_BROKEN_MAINARGS. MinGW doesn't support unicode command line parsing (linker option -municode), so the option -DMINGW_BROKEN_MAINARGS can be used to fix that too. ... (check-in: 047dd62604 user: jan.nijtmans tags: mingw-broken-cmdline)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/main.c.
328
329
330
331
332
333
334

335
336







337
338


339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
380
381
382
383

384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
#endif
  free(g.zErrMsg);
  if(g.db){
    db_close(0);
  }
}


/*
 *-------------------------------------------------------------------------







 *
 * setargv --


 *
 *	Parse the Windows command line string into argc/argv. Done here
 *	because we don't trust the builtin argument parser in crt0. Windows
 *	applications are responsible for breaking their command line into
 *	arguments.
 *
 *	2N backslashes + quote -> N backslashes + begin quoted string
 *	2N + 1 backslashes + quote -> literal
 *	N backslashes + non-quote -> literal
 *	quote + quote in a quoted string -> single quote
 *	quote + quote not in quoted string -> empty string
 *	quote -> begin quoted string
 *
 * Results:
 *	Fills argcPtr with the number of arguments and argvPtr with the array
 *	of arguments.
 *
 * Side effects:
 *	Memory allocated.
 *
 *--------------------------------------------------------------------------
 */

#ifdef MINGW_BROKEN_MAINARGS
#include <tchar.h>

static void
setargv(
    int *argcPtr,		/* Filled with number of argument strings. */
    void *argvPtr)		/* Filled with argument strings (malloc'd). */
{

    TCHAR *cmdLine, *p, *arg, *argSpace;
    TCHAR **argv;
    int argc, size, inquote, copy, slashes;

    cmdLine = GetCommandLine();

    /*
     * Precompute an overly pessimistic guess at the number of arguments in
     * the command line by counting non-space spans.
     */

    size = 2;
    for (p = cmdLine; *p != TEXT('\0'); p++) {
	if ((*p == TEXT(' ')) || (*p == TEXT('\t'))) {	/* INTL: ISO space. */

	    size++;
	    while ((*p == TEXT(' ')) || (*p == TEXT('\t'))) { /* INTL: ISO space. */
		p++;
	    }
	    if (*p == TEXT('\0')) {
		break;
	    }
	}
    }

    argSpace = fossil_malloc(size * sizeof(char *)
	    + (_tcslen(cmdLine) * sizeof(TCHAR)) + sizeof(TCHAR));
    argv = (TCHAR **) argSpace;
    argSpace += size * (sizeof(char *)/sizeof(TCHAR));
    size--;

    p = cmdLine;
    for (argc = 0; argc < size; argc++) {
	argv[argc] = arg = argSpace;
	while ((*p == TEXT(' ')) || (*p == TEXT('\t'))) {	/* INTL: ISO space. */
	    p++;
	}
	if (*p == TEXT('\0')) {
	    break;
	}

	inquote = 0;
	slashes = 0;
	while (1) {
	    copy = 1;
	    while (*p == TEXT('\\')) {
		slashes++;
		p++;
	    }
	    if (*p == TEXT('"')) {
		if ((slashes & 1) == 0) {
		    copy = 0;
		    if ((inquote) && (p[1] == TEXT('"'))) {
			p++;
			copy = 1;
		    } else {
			inquote = !inquote;
		    }
		}
		slashes >>= 1;
	    }

	    while (slashes) {
		*arg = TEXT('\\');
		arg++;
		slashes--;
	    }

	    if ((*p == TEXT('\0')) || (!inquote &&
		    ((*p == TEXT(' ')) || (*p == TEXT('\t'))))) {	/* INTL: ISO space. */
		break;
	    }
	    if (copy != 0) {
		*arg = *p;
		arg++;
	    }
	    p++;
	}
	*arg = '\0';
	argSpace = arg + 1;
    }
    argv[argc] = NULL;

    *argcPtr = argc;
    *((TCHAR ***)argvPtr) = argv;
}
#endif /* MINGW_BROKEN_MAINARGS */


/*
** Convert all arguments from mbcs (or unicode) to UTF-8. Then
** search g.argv for arguments "--args FILENAME". If found, then
** (1) remove the two arguments from g.argv
** (2) Read the file FILENAME







>

<
>
>
>
>
>
>
>
|
<
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
<
|
<
<

|
|
<
|
|
<
>
|
|
|

|

|
|
|
|
<
|
|
<
>
|
|
|
|
|
|
|
|
|

|
|
|
|
|

|
|
|
|
|
|
|
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
|
|
|
|
<
|
<
|
|
|
|
|
|
|
|
|
|
|
|
<
|
|

|







328
329
330
331
332
333
334
335
336

337
338
339
340
341
342
343
344

345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362





363


364
365
366

367
368

369
370
371
372
373
374
375
376
377
378
379

380
381

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427

428
429
430
431
432

433

434
435
436
437
438
439
440
441
442
443
444
445

446
447
448
449
450
451
452
453
454
455
456
#endif
  free(g.zErrMsg);
  if(g.db){
    db_close(0);
  }
}

#if defined(_WIN32)
/*

** Parse the command-line arguments passed to windows.  We do this
** ourselves to work around bugs in the command-line parsing of MinGW.
** It is possible (in theory) to only use this routine when compiling
** with MinGW and to use built-in command-line parsing for MSVC and
** MinGW-64.  However, the code is here, it is efficient, and works, and
** by using it in all cases we do a better job of testing it.  If you suspect
** a bug in this code, test your theory by invoking "fossil test-echo".
**

** This routine is copied from TCL with some reformatting.
** The original comment text follows:
**
** Parse the Windows command line string into argc/argv. Done here
** because we don't trust the builtin argument parser in crt0. Windows
** applications are responsible for breaking their command line into
** arguments.
**
** 2N backslashes + quote -> N backslashes + begin quoted string
** 2N + 1 backslashes + quote -> literal
** N backslashes + non-quote -> literal
** quote + quote in a quoted string -> single quote
** quote + quote not in quoted string -> empty string
** quote -> begin quoted string
**
** Results:
** Fills argcPtr with the number of arguments and argvPtr with the array
** of arguments.





*/


#include <tchar.h>
#define tchar_isspace(X)  ((X)==TEXT(' ') || (X)==TEXT('\t'))
static void parse_windows_command_line(

  int *argcPtr,   /* Filled with number of argument strings. */
  void *argvPtr   /* Filled with argument strings (malloc'd). */

){
  TCHAR *cmdLine, *p, *arg, *argSpace;
  TCHAR **argv;
  int argc, size, inquote, copy, slashes;

  cmdLine = GetCommandLine();

  /*
  ** Precompute an overly pessimistic guess at the number of arguments in
  ** the command line by counting non-space spans.
  */

  size = 2;
  for(p=cmdLine; *p!=TEXT('\0'); p++){

    if( tchar_isspace(*p) ){
      size++;
      while( tchar_isspace(*p) ){
        p++;
      }
      if( *p==TEXT('\0') ){
        break;
      }
    }
  }

  argSpace = fossil_malloc(size * sizeof(char*)
    + (_tcslen(cmdLine) * sizeof(TCHAR)) + sizeof(TCHAR));
  argv = (TCHAR**)argSpace;
  argSpace += size*(sizeof(char*)/sizeof(TCHAR));
  size--;

  p = cmdLine;
  for(argc=0; argc<size; argc++){
    argv[argc] = arg = argSpace;
    while( tchar_isspace(*p) ){
      p++;
    }
    if (*p == TEXT('\0')) {
      break;
    }

    inquote = 0;
    slashes = 0;
    while(1){
      copy = 1;
      while( *p==TEXT('\\') ){
        slashes++;
        p++;
      }
      if( *p==TEXT('"') ){
        if( (slashes&1)==0 ){
          copy = 0;
          if( inquote && p[1]==TEXT('"') ){
            p++;
            copy = 1;
          }else{
            inquote = !inquote;
          }
        }
        slashes >>= 1;
      }

      while( slashes ){
        *arg = TEXT('\\');
        arg++;
        slashes--;
      }

      if( *p==TEXT('\0') || (!inquote && tchar_isspace(*p)) ){

        break;
      }
      if( copy!=0 ){
        *arg = *p;
        arg++;
      }
      p++;
    }
    *arg = '\0';
    argSpace = arg + 1;
  }
  argv[argc] = NULL;

  *argcPtr = argc;
  *((TCHAR ***)argvPtr) = argv;
}
#endif /* defined(_WIN32) */


/*
** Convert all arguments from mbcs (or unicode) to UTF-8. Then
** search g.argv for arguments "--args FILENAME". If found, then
** (1) remove the two arguments from g.argv
** (2) Read the file FILENAME
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
#ifdef _WIN32
  wchar_t buf[MAX_PATH];
#endif

  g.argc = argc;
  g.argv = argv;
#ifdef _WIN32
#ifdef MINGW_BROKEN_MAINARGS
  setargv(&g.argc, &g.argv);
#endif
  GetModuleFileNameW(NULL, buf, MAX_PATH);
  g.argv[0] = fossil_unicode_to_utf8(buf);
#ifdef UNICODE
  for(i=1; i<g.argc; i++) g.argv[i] = fossil_unicode_to_utf8(g.argv[i]);
#else
  for(i=1; i<g.argc; i++) g.argv[i] = fossil_mbcs_to_utf8(g.argv[i]);
#endif







<
|
<







474
475
476
477
478
479
480

481

482
483
484
485
486
487
488
#ifdef _WIN32
  wchar_t buf[MAX_PATH];
#endif

  g.argc = argc;
  g.argv = argv;
#ifdef _WIN32

  parse_windows_command_line(&g.argc, &g.argv);

  GetModuleFileNameW(NULL, buf, MAX_PATH);
  g.argv[0] = fossil_unicode_to_utf8(buf);
#ifdef UNICODE
  for(i=1; i<g.argc; i++) g.argv[i] = fossil_unicode_to_utf8(g.argv[i]);
#else
  for(i=1; i<g.argc; i++) g.argv[i] = fossil_mbcs_to_utf8(g.argv[i]);
#endif
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
    if( n<=1 ) continue;
    z = blob_buffer(&line);
    z[n-1] = 0;
    if (foundBom == -1) {
      static const char bom[] = { 0xEF, 0xBB, 0xBF };
      foundBom = memcmp(z, bom, 3)==0;
      if( foundBom ) {
    	  z += 3; n -= 3;
      }
    }
    if((n>1) && ('\r'==z[n-2])){
      if(n==2) continue /*empty line*/;
      z[n-2] = 0;
    }
    if (!foundBom) {







|







520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
    if( n<=1 ) continue;
    z = blob_buffer(&line);
    z[n-1] = 0;
    if (foundBom == -1) {
      static const char bom[] = { 0xEF, 0xBB, 0xBF };
      foundBom = memcmp(z, bom, 3)==0;
      if( foundBom ) {
        z += 3; n -= 3;
      }
    }
    if((n>1) && ('\r'==z[n-2])){
      if(n==2) continue /*empty line*/;
      z[n-2] = 0;
    }
    if (!foundBom) {
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
  g.argc = j;
  g.argv = newArgv;
}

/*
** This procedure runs first.
*/
#if defined(_WIN32) && defined(UNICODE) && !defined(MINGW_BROKEN_MAINARGS)
int wmain(int argc, wchar_t **argv)
#else
int main(int argc, char **argv)
#endif
{
  const char *zCmdName = "unknown";
  int idx;
  int rc;

  sqlite3_config(SQLITE_CONFIG_LOG, fossil_sqlite_log, 0);
  memset(&g, 0, sizeof(g));
  g.now = time(0);







<
<
<
|
<
<







550
551
552
553
554
555
556



557


558
559
560
561
562
563
564
  g.argc = j;
  g.argv = newArgv;
}

/*
** This procedure runs first.
*/



int main(int argc, char **argv){


  const char *zCmdName = "unknown";
  int idx;
  int rc;

  sqlite3_config(SQLITE_CONFIG_LOG, fossil_sqlite_log, 0);
  memset(&g, 0, sizeof(g));
  g.now = time(0);