Fossil

Check-in [82888a0d35]
Login

Check-in [82888a0d35]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a new setting "regexp-limit" that determines the maximum size of a REGEXP virtual machine. Default value 1000.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 82888a0d35a80944460eff38b34a25d942e141b1778af1e56451d7029361660c
User & Date: drh 2025-09-26 20:07:06.903
Context
2025-09-27
11:10
Update the built-in SQLite to the latest trunk version so that it will compile without warnings on Windows. ... (check-in: 702a56d116 user: drh tags: trunk)
00:47
Use the datetime of the start of the branch as the input for the color hash. See suggestion made by Stephan Beal in [forum:/forumpost/a9a92d73c4a172f9|forum post a9a92d73c4a172f9]. ... (check-in: 1851b26d2b user: andybradford tags: datetime-color-hash)
2025-09-26
20:07
Add a new setting "regexp-limit" that determines the maximum size of a REGEXP virtual machine. Default value 1000. ... (check-in: 82888a0d35 user: drh tags: trunk)
14:23
Updates to the changelog. ... (check-in: 28483bfc5a user: danield tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/browse.c.
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  }else{
    startExpanded = 0;
  }

  /* If a regular expression is specified, compile it */
  zRE = P("re");
  if( zRE ){
    re_compile(&pRE, zRE, 0);
    zREx = mprintf("&re=%T", zRE);
  }
  cgi_check_for_malice();

  /* If the name= parameter is an empty string, make it a NULL pointer */
  if( zD && strlen(zD)==0 ){ zD = 0; }








|







745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  }else{
    startExpanded = 0;
  }

  /* If a regular expression is specified, compile it */
  zRE = P("re");
  if( zRE ){
    fossil_re_compile(&pRE, zRE, 0);
    zREx = mprintf("&re=%T", zRE);
  }
  cgi_check_for_malice();

  /* If the name= parameter is an empty string, make it a NULL pointer */
  if( zD && strlen(zD)==0 ){ zD = 0; }

Changes to src/diff.c.
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
    return;
  }
  find_option("i",0,0);
  find_option("v",0,0);
  diff_options(&DCfg, 0, 0);
  zRe = find_option("regexp","e",1);
  if( zRe ){
    const char *zErr = re_compile(&DCfg.pRe, zRe, 0);
    if( zErr ) fossil_fatal("regex error: %s", zErr);
  }
  verify_all_options();
  if( g.argc!=4 ) usage("FILE1 FILE2");
  blob_zero(&out);
  diff_begin(&DCfg);
  diff_print_filenames(g.argv[2], g.argv[3], &DCfg, &out);







|







3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
    return;
  }
  find_option("i",0,0);
  find_option("v",0,0);
  diff_options(&DCfg, 0, 0);
  zRe = find_option("regexp","e",1);
  if( zRe ){
    const char *zErr = fossil_re_compile(&DCfg.pRe, zRe, 0);
    if( zErr ) fossil_fatal("regex error: %s", zErr);
  }
  verify_all_options();
  if( g.argc!=4 ) usage("FILE1 FILE2");
  blob_zero(&out);
  diff_begin(&DCfg);
  diff_print_filenames(g.argv[2], g.argv[3], &DCfg, &out);
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
    return;
  }
  find_option("i",0,0);
  find_option("v",0,0);
  diff_options(&DCfg, 0, 0);
  zRe = find_option("regexp","e",1);
  if( zRe ){
    const char *zErr = re_compile(&DCfg.pRe, zRe, 0);
    if( zErr ) fossil_fatal("regex error: %s", zErr);
  }
  db_find_and_open_repository(0, 0);
  verify_all_options();
  if( g.argc!=4 ) usage("HASH1 HASH2");
  blob_zero(&out);
  diff_begin(&DCfg);







|







3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
    return;
  }
  find_option("i",0,0);
  find_option("v",0,0);
  diff_options(&DCfg, 0, 0);
  zRe = find_option("regexp","e",1);
  if( zRe ){
    const char *zErr = fossil_re_compile(&DCfg.pRe, zRe, 0);
    if( zErr ) fossil_fatal("regex error: %s", zErr);
  }
  db_find_and_open_repository(0, 0);
  verify_all_options();
  if( g.argc!=4 ) usage("HASH1 HASH2");
  blob_zero(&out);
  diff_begin(&DCfg);
Changes to src/dispatch.c.
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
  if( bAbbrevSubcmd ){
    zPattern = mprintf("   ([a-z]+ ?\\| ?)*%s\\b", zQSub);
  }else{
    zPattern = mprintf(">  ?fossil [-a-z]+ .*\\b%s\\b", zQSub);
  }
  fossil_free(zQTop);
  fossil_free(zQSub);
  re_compile(&pRe, zPattern, 0);
  fossil_free(zPattern);
  blob_init(&in, z, -1);
  while( blob_line(&in, &line) ){
    if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_size(&line)) ){
      int atStart = 1;
      blob_appendb(pOut, &line);
      n++;







|







1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
  if( bAbbrevSubcmd ){
    zPattern = mprintf("   ([a-z]+ ?\\| ?)*%s\\b", zQSub);
  }else{
    zPattern = mprintf(">  ?fossil [-a-z]+ .*\\b%s\\b", zQSub);
  }
  fossil_free(zQTop);
  fossil_free(zQSub);
  fossil_re_compile(&pRe, zPattern, 0);
  fossil_free(zPattern);
  blob_init(&in, z, -1);
  while( blob_line(&in, &line) ){
    if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_size(&line)) ){
      int atStart = 1;
      blob_appendb(pOut, &line);
      n++;
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
  int bAbbrevSubcmd         /* z[] uses abbreviated subcommands */
){
  ReCompiled *pRe = 0;
  Blob in, line;
  int n = 0;

  if( bAbbrevSubcmd ){
    re_compile(&pRe, "^(Usage: |   [a-z][-a-z|]+ .*)", 0);
  }else{
    re_compile(&pRe, "^(Usage: | *[Oo]r: +%fossi |>  ?fossil )", 0);
  }
  blob_init(&in, z, -1);
  while( blob_line(&in, &line) ){
    if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_strlen(&line)) ){
      simplify_usage_line(&line, pOut, bAbbrevSubcmd, zTopic);
      n++;
    }







|

|







1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
  int bAbbrevSubcmd         /* z[] uses abbreviated subcommands */
){
  ReCompiled *pRe = 0;
  Blob in, line;
  int n = 0;

  if( bAbbrevSubcmd ){
    fossil_re_compile(&pRe, "^(Usage: |   [a-z][-a-z|]+ .*)", 0);
  }else{
    fossil_re_compile(&pRe, "^(Usage: | *[Oo]r: +%fossi |>  ?fossil )", 0);
  }
  blob_init(&in, z, -1);
  while( blob_line(&in, &line) ){
    if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_strlen(&line)) ){
      simplify_usage_line(&line, pOut, bAbbrevSubcmd, zTopic);
      n++;
    }
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  ReCompiled *pRe = 0;
  Blob txt, line, subsection;
  int n = 0;
  int bSubsectionSeen = 0;

  blob_init(&txt, z, -1);
  blob_init(&subsection, 0, 0);
  re_compile(&pRe, "^ +-.*  ", 0);
  while( blob_line(&txt, &line) ){
    int len = blob_size(&line);
    unsigned char *zLine = (unsigned char *)blob_buffer(&line);
    if( re_match(pRe, zLine, len) ){
      if( blob_size(&subsection) ){
        simplify_usage_line(&subsection, pOut, bAbbrevSubcmd, zCmd);
        blob_reset(&subsection);







|







1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  ReCompiled *pRe = 0;
  Blob txt, line, subsection;
  int n = 0;
  int bSubsectionSeen = 0;

  blob_init(&txt, z, -1);
  blob_init(&subsection, 0, 0);
  fossil_re_compile(&pRe, "^ +-.*  ", 0);
  while( blob_line(&txt, &line) ){
    int len = blob_size(&line);
    unsigned char *zLine = (unsigned char *)blob_buffer(&line);
    if( re_match(pRe, zLine, len) ){
      if( blob_size(&subsection) ){
        simplify_usage_line(&subsection, pOut, bAbbrevSubcmd, zCmd);
        blob_reset(&subsection);
Changes to src/info.c.
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
  if( rid==0 ){
    style_header("Check-in Information Error");
    @ No such object: %h(zName)
    style_finish_page();
    return;
  }
  zRe = P("regex");
  if( zRe ) re_compile(&pRe, zRe, 0);
  zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
  zParent = db_text(0,
    "SELECT uuid FROM plink, blob"
    " WHERE plink.cid=%d AND blob.rid=plink.pid AND plink.isprim",
    rid
  );
  isLeaf = !db_exists("SELECT 1 FROM plink WHERE pid=%d", rid);







|







934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
  if( rid==0 ){
    style_header("Check-in Information Error");
    @ No such object: %h(zName)
    style_finish_page();
    return;
  }
  zRe = P("regex");
  if( zRe ) fossil_re_compile(&pRe, zRe, 0);
  zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
  zParent = db_text(0,
    "SELECT uuid FROM plink, blob"
    " WHERE plink.cid=%d AND blob.rid=plink.pid AND plink.isprim",
    rid
  );
  isLeaf = !db_exists("SELECT 1 FROM plink WHERE pid=%d", rid);
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
  if( robot_restrict("diff") ) return;
  login_anonymous_available();
  fossil_nice_default();
  blob_init(&qp, 0, 0);
  blob_init(&qpGlob, 0, 0);
  diffType = preferred_diff_type();
  zRe = P("regex");
  if( zRe ) re_compile(&pRe, zRe, 0);
  zBranch = P("branch");
  if( zBranch && zBranch[0]==0 ) zBranch = 0;
  if( zBranch ){
    blob_appendf(&qp, "branch=%T", zBranch);
    zMergeOrigin = mprintf("merge-in:%s", zBranch);
    cgi_replace_parameter("from", zMergeOrigin);
    cgi_replace_parameter("to", zBranch);







|







1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
  if( robot_restrict("diff") ) return;
  login_anonymous_available();
  fossil_nice_default();
  blob_init(&qp, 0, 0);
  blob_init(&qpGlob, 0, 0);
  diffType = preferred_diff_type();
  zRe = P("regex");
  if( zRe ) fossil_re_compile(&pRe, zRe, 0);
  zBranch = P("branch");
  if( zBranch && zBranch[0]==0 ) zBranch = 0;
  if( zBranch ){
    blob_appendf(&qp, "branch=%T", zBranch);
    zMergeOrigin = mprintf("merge-in:%s", zBranch);
    cgi_replace_parameter("from", zMergeOrigin);
    cgi_replace_parameter("to", zBranch);
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
        "%R/annotate?origin=%s&checkin=%s&filename=%T",
        zOrig, zCkin, zFN);
    }
    db_finalize(&q);
  }
  zRe = P("regex");
  cgi_check_for_malice();
  if( zRe ) re_compile(&pRe, zRe, 0);
  if( verbose ) objdescFlags |= OBJDESC_DETAIL;
  if( isPatch ){
    Blob c1, c2, *pOut;
    DiffConfig DCfg;
    pOut = cgi_output_blob();
    cgi_set_content_type("text/plain");
    DCfg.diffFlags = DIFF_VERBOSE;







|







2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
        "%R/annotate?origin=%s&checkin=%s&filename=%T",
        zOrig, zCkin, zFN);
    }
    db_finalize(&q);
  }
  zRe = P("regex");
  cgi_check_for_malice();
  if( zRe ) fossil_re_compile(&pRe, zRe, 0);
  if( verbose ) objdescFlags |= OBJDESC_DETAIL;
  if( isPatch ){
    Blob c1, c2, *pOut;
    DiffConfig DCfg;
    pOut = cgi_output_blob();
    cgi_set_content_type("text/plain");
    DCfg.diffFlags = DIFF_VERBOSE;
Changes to src/json.c.
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    /* When running in server/cgi "directory" mode, zPathInfo is
    ** prefixed with the repository's name, so in order to determine
    ** whether or not we're really running in json mode we have to try
    ** a bit harder. Problem reported here:
    ** https://fossil-scm.org/forum/forumpost/e4953666d6
    */
    ReCompiled * pReg = 0;
    const char * zErr = re_compile(&pReg, "^/[^/]+/json(/.*)?", 0);
    assert(zErr==0 && "Regex compilation failed?");
    if(zErr==0 &&
         re_match(pReg, (const unsigned char *)zPathInfo, -1)){
      rc = 2;
    }
    re_free(pReg);
  }







|







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    /* When running in server/cgi "directory" mode, zPathInfo is
    ** prefixed with the repository's name, so in order to determine
    ** whether or not we're really running in json mode we have to try
    ** a bit harder. Problem reported here:
    ** https://fossil-scm.org/forum/forumpost/e4953666d6
    */
    ReCompiled * pReg = 0;
    const char * zErr = fossil_re_compile(&pReg, "^/[^/]+/json(/.*)?", 0);
    assert(zErr==0 && "Regex compilation failed?");
    if(zErr==0 &&
         re_match(pReg, (const unsigned char *)zPathInfo, -1)){
      rc = 2;
    }
    re_free(pReg);
  }
Changes to src/match.c.
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    zOne = fossil_strndup(zPat, i);
    zPat += i;
    if( zPat[0] ) zPat++;

    /* Check for regular expression syntax errors. */
    if( style==MS_REGEXP ){
      ReCompiled *regexp;
      const char *zFail = re_compile(&regexp, zOne, 0);
      if( zFail ){
        re_free(regexp);
        continue;
      }
      p->nPattern++;
      p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)*p->nPattern);
      p->aRe[p->nPattern-1] = regexp;







|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    zOne = fossil_strndup(zPat, i);
    zPat += i;
    if( zPat[0] ) zPat++;

    /* Check for regular expression syntax errors. */
    if( style==MS_REGEXP ){
      ReCompiled *regexp;
      const char *zFail = fossil_re_compile(&regexp, zOne, 0);
      if( zFail ){
        re_free(regexp);
        continue;
      }
      p->nPattern++;
      p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)*p->nPattern);
      p->aRe[p->nPattern-1] = regexp;
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
      }
    }

    /* Check for regular expression syntax errors. */
    if( matchStyle==MS_REGEXP ){
      ReCompiled *regexp;
      char *zTagDup = fossil_strndup(zTag, i);
      zFail = re_compile(&regexp, zTagDup, 0);
      re_free(regexp);
      fossil_free(zTagDup);
    }

    /* Process success and error results. */
    if( !zFail ){
      /* Incorporate the match word into the output expression.  %q is used to







|







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
      }
    }

    /* Check for regular expression syntax errors. */
    if( matchStyle==MS_REGEXP ){
      ReCompiled *regexp;
      char *zTagDup = fossil_strndup(zTag, i);
      zFail = fossil_re_compile(&regexp, zTagDup, 0);
      re_free(regexp);
      fossil_free(zTagDup);
    }

    /* Process success and error results. */
    if( !zFail ){
      /* Incorporate the match word into the output expression.  %q is used to
Changes to src/regexp.c.
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
**
**
**  The following regular expression syntax is supported:
**
**     X*      zero or more occurrences of X
**     X+      one or more occurrences of X
**     X?      zero or one occurrences of X
**     X{p,q}  between p and q occurrences of X,    0 <= p,q <= 999
**     (X)     match X
**     X|Y     X or Y
**     ^X      X occurring at the beginning of the string
**     X$      X occurring at the end of the string
**     .       Match any single character
**     \c      Character c where c is one of \{}()[]|*+?.
**     \c      C-language escapes for c in afnrtv.  ex: \t or \n







|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
**
**
**  The following regular expression syntax is supported:
**
**     X*      zero or more occurrences of X
**     X+      one or more occurrences of X
**     X?      zero or one occurrences of X
**     X{p,q}  between p and q occurrences of X
**     (X)     match X
**     X|Y     X or Y
**     ^X      X occurring at the beginning of the string
**     X$      X occurring at the end of the string
**     .       Match any single character
**     \c      Character c where c is one of \{}()[]|*+?.
**     \c      C-language escapes for c in afnrtv.  ex: \t or \n
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
** A nondeterministic finite automaton (NFA) is used for matching, so the
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.
**
** To help prevent DoS attacks, the values of p and q in the "{p,q}" syntax
** are limited to SQLITE_MAX_REGEXP_REPEAT, default 999.
*/
#include "config.h"
#include "regexp.h"

#ifndef SQLITE_MAX_REGEXP_REPEAT
# define SQLITE_MAX_REGEXP_REPEAT 999
#endif







<
<
<







51
52
53
54
55
56
57



58
59
60
61
62
63
64
** A nondeterministic finite automaton (NFA) is used for matching, so the
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.



*/
#include "config.h"
#include "regexp.h"

#ifndef SQLITE_MAX_REGEXP_REPEAT
# define SQLITE_MAX_REGEXP_REPEAT 999
#endif
123
124
125
126
127
128
129

130
131
132
133
134
135
136
  char *aOp;                  /* Operators for the virtual machine */
  int *aArg;                  /* Arguments to each operator */
  unsigned (*xNextChar)(ReInput*);  /* Next character function */
  unsigned char zInit[12];    /* Initial text to match */
  int nInit;                  /* Number of characters in zInit */
  unsigned nState;            /* Number of entries in aOp[] and aArg[] */
  unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */

};
#endif

/* Add a state to the given state set if it is not already there */
static void re_add_state(ReStateSet *pSet, int newState){
  unsigned i;
  for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;







>







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
  char *aOp;                  /* Operators for the virtual machine */
  int *aArg;                  /* Arguments to each operator */
  unsigned (*xNextChar)(ReInput*);  /* Next character function */
  unsigned char zInit[12];    /* Initial text to match */
  int nInit;                  /* Number of characters in zInit */
  unsigned nState;            /* Number of entries in aOp[] and aArg[] */
  unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */
  unsigned mxAlloc;           /* Complexity limit */
};
#endif

/* Add a state to the given state set if it is not already there */
static void re_add_state(ReStateSet *pSet, int newState){
  unsigned i;
  for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;
339
340
341
342
343
344
345

346
347
348
349
350
351
352
353
354
355
356
357
}

/* Resize the opcode and argument arrays for an RE under construction.
*/
static int re_resize(ReCompiled *p, int N){
  char *aOp;
  int *aArg;

  aOp = fossil_realloc(p->aOp, N*sizeof(p->aOp[0]));
  if( aOp==0 ) return 1;
  p->aOp = aOp;
  aArg = fossil_realloc(p->aArg, N*sizeof(p->aArg[0]));
  if( aArg==0 ) return 1;
  p->aArg = aArg;
  p->nAlloc = N;
  return 0;
}

/* Insert a new opcode and argument into an RE under construction.  The
** insertion point is just prior to existing opcode iBefore.







>

|


|







337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
}

/* Resize the opcode and argument arrays for an RE under construction.
*/
static int re_resize(ReCompiled *p, int N){
  char *aOp;
  int *aArg;
  if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; }
  aOp = fossil_realloc(p->aOp, N*sizeof(p->aOp[0]));
  if( aOp==0 ){ p->zErr = "out of memory"; return 1; }
  p->aOp = aOp;
  aArg = fossil_realloc(p->aArg, N*sizeof(p->aArg[0]));
  if( aArg==0 ){ p->zErr = "out of memory"; return 1; }
  p->aArg = aArg;
  p->nAlloc = N;
  return 0;
}

/* Insert a new opcode and argument into an RE under construction.  The
** insertion point is just prior to existing opcode iBefore.
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
      }
      case '{': {
        unsigned int m = 0, n = 0;
        unsigned int sz, j;
        if( iPrev<0 ) return "'{m,n}' without operand";
        while( (c=rePeek(p))>='0' && c<='9' ){
          m = m*10 + c - '0';
          if( m>SQLITE_MAX_REGEXP_REPEAT ) return "integer too large";
          p->sIn.i++;
        }
        n = m;
        if( c==',' ){
          p->sIn.i++;
          n = 0;
          while( (c=rePeek(p))>='0' && c<='9' ){
            n = n*10 + c-'0';
            if( n>SQLITE_MAX_REGEXP_REPEAT ) return "integer too large";
            p->sIn.i++;
          }
        }
        if( c!='}' ) return "unmatched '{'";
        if( n>0 && n<m ) return "n less than m in '{m,n}'";
        p->sIn.i++;
        sz = p->nState - iPrev;
        if( m==0 ){
          if( n==0 ) return "both m and n are zero in '{m,n}'";
          re_insert(p, iPrev, RE_OP_FORK, sz+1);
          iPrev++;
          n--;
        }else{
          for(j=1; j<m; j++) re_copy(p, iPrev, sz);
        }
        for(j=m; j<n; j++){
          re_append(p, RE_OP_FORK, sz+1);
          re_copy(p, iPrev, sz);
        }
        if( n==0 && m>0 ){
          re_append(p, RE_OP_FORK, -sz);
        }
        break;
      }
      case '[': {
        unsigned int iFirst = p->nState;
        if( rePeek(p)=='^' ){
          re_append(p, RE_OP_CC_EXC, 0);







|








|




















|







531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
      }
      case '{': {
        unsigned int m = 0, n = 0;
        unsigned int sz, j;
        if( iPrev<0 ) return "'{m,n}' without operand";
        while( (c=rePeek(p))>='0' && c<='9' ){
          m = m*10 + c - '0';
          if( m*2>p->mxAlloc ) return "REGEXP pattern too big";
          p->sIn.i++;
        }
        n = m;
        if( c==',' ){
          p->sIn.i++;
          n = 0;
          while( (c=rePeek(p))>='0' && c<='9' ){
            n = n*10 + c-'0';
            if( n*2>p->mxAlloc ) return "REGEXP pattern too big";
            p->sIn.i++;
          }
        }
        if( c!='}' ) return "unmatched '{'";
        if( n>0 && n<m ) return "n less than m in '{m,n}'";
        p->sIn.i++;
        sz = p->nState - iPrev;
        if( m==0 ){
          if( n==0 ) return "both m and n are zero in '{m,n}'";
          re_insert(p, iPrev, RE_OP_FORK, sz+1);
          iPrev++;
          n--;
        }else{
          for(j=1; j<m; j++) re_copy(p, iPrev, sz);
        }
        for(j=m; j<n; j++){
          re_append(p, RE_OP_FORK, sz+1);
          re_copy(p, iPrev, sz);
        }
        if( n==0 && m>0 ){
          re_append(p, RE_OP_FORK, -(int)sz);
        }
        break;
      }
      case '[': {
        unsigned int iFirst = p->nState;
        if( rePeek(p)=='^' ){
          re_append(p, RE_OP_CC_EXC, 0);
642
643
644
645
646
647
648
649





650
651
652
653
654
655
656
657
658
659
660

661

662
663
664
665
666
667
668
669
670

/*
** Compile a textual regular expression in zIn[] into a compiled regular
** expression suitable for us by re_match() and return a pointer to the
** compiled regular expression in *ppRe.  Return NULL on success or an
** error message if something goes wrong.
*/
const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){





  ReCompiled *pRe;
  const char *zErr;
  int i, j;

  *ppRe = 0;
  pRe = fossil_malloc( sizeof(*pRe) );
  if( pRe==0 ){
    return "out of memory";
  }
  memset(pRe, 0, sizeof(*pRe));
  pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char;

  if( re_resize(pRe, 30) ){

    re_free(pRe);
    return "out of memory";
  }
  if( zIn[0]=='^' ){
    zIn++;
  }else{
    re_append(pRe, RE_OP_ANYSTAR, 0);
  }
  pRe->sIn.z = (unsigned char*)zIn;







|
>
>
>
>
>











>

>

|







641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676

/*
** Compile a textual regular expression in zIn[] into a compiled regular
** expression suitable for us by re_match() and return a pointer to the
** compiled regular expression in *ppRe.  Return NULL on success or an
** error message if something goes wrong.
*/
const char *re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int mxRe,               /* Complexity limit */
  int noCase              /* True for caseless comparisons */
){
  ReCompiled *pRe;
  const char *zErr;
  int i, j;

  *ppRe = 0;
  pRe = fossil_malloc( sizeof(*pRe) );
  if( pRe==0 ){
    return "out of memory";
  }
  memset(pRe, 0, sizeof(*pRe));
  pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char;
  pRe->mxAlloc = mxRe;
  if( re_resize(pRe, 30) ){
    zErr = pRe->zErr;
    re_free(pRe);
    return zErr;
  }
  if( zIn[0]=='^' ){
    zIn++;
  }else{
    re_append(pRe, RE_OP_ANYSTAR, 0);
  }
  pRe->sIn.z = (unsigned char*)zIn;
747
748
749
750
751
752
753


























754
755
756
757
758
759
760
      }
    }
    zIn++;
  }
  blob_materialize(&out);
  return out.aData;
}



























/*
** Implementation of the regexp() SQL function.  This function implements
** the build-in REGEXP operator.  The first argument to the function is the
** pattern and the second argument is the string.  So, the SQL statements:
**
**       A REGEXP B







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
      }
    }
    zIn++;
  }
  blob_materialize(&out);
  return out.aData;
}

/*
** SETTING:  regexp-limit                  width=8 default=1000
**
** Limit the size of the bytecode used to implement a regular expression
** to this many steps.  It is important to limit this to avoid possible
** DoS attacks.
*/

/*
** Compute a reasonable limit on the length of the REGEXP NFA.
*/
int re_maxlen(void){
  return g.db ? db_get_int("regexp-limit", 1000) : 1000;
}

/*
** Compile an RE using re_maxlen().
*/
const char *fossil_re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int noCase              /* True for caseless comparisons */
){
  return re_compile(ppRe, zIn, re_maxlen(), noCase);
}

/*
** Implementation of the regexp() SQL function.  This function implements
** the build-in REGEXP operator.  The first argument to the function is the
** pattern and the second argument is the string.  So, the SQL statements:
**
**       A REGEXP B
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
  int setAux = 0;           /* True to invoke sqlite3_set_auxdata() */

  (void)argc;  /* Unused */
  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
    if( zErr ){
      re_free(pRe);
      sqlite3_result_int(context, 0);
      /* sqlite3_result_error(context, zErr, -1); */
      return;
    }
    if( pRe==0 ){







|







805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
  int setAux = 0;           /* True to invoke sqlite3_set_auxdata() */

  (void)argc;  /* Unused */
  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
    if( zErr ){
      re_free(pRe);
      sqlite3_result_int(context, 0);
      /* sqlite3_result_error(context, zErr, -1); */
      return;
    }
    if( pRe==0 ){
801
802
803
804
805
806
807
808

809
810
811
812
813

814
815
816
817
818
819
820

/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
*/
int re_add_sql_func(sqlite3 *db){
  int rc;
  rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,

                               0, re_sql_func, 0, 0);
  if( rc==SQLITE_OK ){
    /* The regexpi(PATTERN,STRING) function is a case-insensitive version
    ** of regexp(PATTERN,STRING). */
    rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS,

                                 (void*)db, re_sql_func, 0, 0);
  }
  return rc;
}

/*
** Run a "grep" over a single file read from disk.







|
>




|
>







833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854

/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
*/
int re_add_sql_func(sqlite3 *db){
  int rc;
  rc = sqlite3_create_function(db, "regexp", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                               0, re_sql_func, 0, 0);
  if( rc==SQLITE_OK ){
    /* The regexpi(PATTERN,STRING) function is a case-insensitive version
    ** of regexp(PATTERN,STRING). */
    rc = sqlite3_create_function(db, "regexpi", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                                 (void*)db, re_sql_func, 0, 0);
  }
  return rc;
}

/*
** Run a "grep" over a single file read from disk.
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
  int ignoreCase = find_option("ignore-case","i",0)!=0;
  int bRobot = find_option("robot-exception",0,0)!=0;
  if( bRobot ){
    const char *zRe;
    db_find_and_open_repository(0,0);
    verify_all_options();
    zRe = db_get("robot-exception","^$");
    zErr = re_compile(&pRe, zRe, ignoreCase);
    iFileList = 2;
  }else{
    verify_all_options();
    if( g.argc<3 ){
      usage("REGEXP [FILE...]");
    }
    zErr = re_compile(&pRe, g.argv[2], ignoreCase);
  }
  if( zErr ) fossil_fatal("%s", zErr);
  if( g.argc==iFileList ){
    grep_file(pRe, "-", stdin);
  }else{
    int i;
    for(i=iFileList; i<g.argc; i++){







|






|







923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
  int ignoreCase = find_option("ignore-case","i",0)!=0;
  int bRobot = find_option("robot-exception",0,0)!=0;
  if( bRobot ){
    const char *zRe;
    db_find_and_open_repository(0,0);
    verify_all_options();
    zRe = db_get("robot-exception","^$");
    zErr = fossil_re_compile(&pRe, zRe, ignoreCase);
    iFileList = 2;
  }else{
    verify_all_options();
    if( g.argc<3 ){
      usage("REGEXP [FILE...]");
    }
    zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase);
  }
  if( zErr ) fossil_fatal("%s", zErr);
  if( g.argc==iFileList ){
    grep_file(pRe, "-", stdin);
  }else{
    int i;
    for(i=iFileList; i<g.argc; i++){
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
    flags |= GREP_QUIET|GREP_EXISTS;
  }
  db_find_and_open_repository(0, 0);
  verify_all_options();
  if( g.argc<4 ){
    usage("REGEXP FILENAME ...");
  }
  zErr = re_compile(&pRe, g.argv[2], ignoreCase);
  if( zErr ) fossil_fatal("%s", zErr);

  add_content_sql_commands(g.db);
  db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);");
  for(ii=3; ii<g.argc; ii++){
    const char *zTarget = g.argv[ii];
    if( file_tree_name(zTarget, &fullName, 0, 1) ){







|







1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
    flags |= GREP_QUIET|GREP_EXISTS;
  }
  db_find_and_open_repository(0, 0);
  verify_all_options();
  if( g.argc<4 ){
    usage("REGEXP FILENAME ...");
  }
  zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase);
  if( zErr ) fossil_fatal("%s", zErr);

  add_content_sql_commands(g.db);
  db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);");
  for(ii=3; ii<g.argc; ii++){
    const char *zTarget = g.argv[ii];
    if( file_tree_name(zTarget, &fullName, 0, 1) ){
Changes to src/robot.c.
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
        continue;
      }
    }else{
      n = strlen(zRE);
    }
    z = mprintf("%.*s", (int)(zNL - zRE)+1, zRE);
    zRE += n;
    zErr = re_compile(&pRe, z, 0);
    if( zErr ){
      fossil_warning("robot-exception error \"%s\" in expression \"%s\"\n",
                     zErr, z);
      fossil_free(z);
      continue;
    }
    fossil_free(z);







|







365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
        continue;
      }
    }else{
      n = strlen(zRE);
    }
    z = mprintf("%.*s", (int)(zNL - zRE)+1, zRE);
    zRE += n;
    zErr = fossil_re_compile(&pRe, z, 0);
    if( zErr ){
      fossil_warning("robot-exception error \"%s\" in expression \"%s\"\n",
                     zErr, z);
      fossil_free(z);
      continue;
    }
    fossil_free(z);
Changes to src/th_main.c.
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
  if( fossil_strcmp(argv[nArg], "-nocase")==0 ){
    noCase = 1; nArg++;
  }
  if( fossil_strcmp(argv[nArg], "--")==0 ) nArg++;
  if( nArg+2!=argc ){
    return Th_WrongNumArgs(interp, REGEXP_WRONGNUMARGS);
  }
  zErr = re_compile(&pRe, argv[nArg], noCase);
  if( !zErr ){
    Th_SetResultInt(interp, re_match(pRe,
        (const unsigned char *)argv[nArg+1], TH1_LEN(argl[nArg+1])));
    rc = TH_OK;
  }else{
    Th_SetResult(interp, zErr, -1);
    rc = TH_ERROR;







|







2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
  if( fossil_strcmp(argv[nArg], "-nocase")==0 ){
    noCase = 1; nArg++;
  }
  if( fossil_strcmp(argv[nArg], "--")==0 ) nArg++;
  if( nArg+2!=argc ){
    return Th_WrongNumArgs(interp, REGEXP_WRONGNUMARGS);
  }
  zErr = fossil_re_compile(&pRe, argv[nArg], noCase);
  if( !zErr ){
    Th_SetResultInt(interp, re_match(pRe,
        (const unsigned char *)argv[nArg+1], TH1_LEN(argl[nArg+1])));
    rc = TH_OK;
  }else{
    Th_SetResult(interp, zErr, -1);
    rc = TH_ERROR;
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
  url_parse_local(argv[nArg], 0, &urlData);
  if( urlData.isSsh || urlData.isFile ){
    Th_ErrorMessage(interp, "url must be http:// or https://", 0, 0);
    return TH_ERROR;
  }
  zRegexp = db_get("th1-uri-regexp", 0);
  if( zRegexp && zRegexp[0] ){
    const char *zErr = re_compile(&pRe, zRegexp, 0);
    if( zErr ){
      Th_SetResult(interp, zErr, -1);
      return TH_ERROR;
    }
  }
  if( !pRe || !re_match(pRe, (const unsigned char *)urlData.canonical, -1) ){
    Th_SetResult(interp, "url not allowed", -1);







|







2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
  url_parse_local(argv[nArg], 0, &urlData);
  if( urlData.isSsh || urlData.isFile ){
    Th_ErrorMessage(interp, "url must be http:// or https://", 0, 0);
    return TH_ERROR;
  }
  zRegexp = db_get("th1-uri-regexp", 0);
  if( zRegexp && zRegexp[0] ){
    const char *zErr = fossil_re_compile(&pRe, zRegexp, 0);
    if( zErr ){
      Th_SetResult(interp, zErr, -1);
      return TH_ERROR;
    }
  }
  if( !pRe || !re_match(pRe, (const unsigned char *)urlData.canonical, -1) ){
    Th_SetResult(interp, "url not allowed", -1);