Fossil

Check-in [a18dab4184]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further reduce divergence between the SQLite and Fossil implementations of regexp.c. Fix compiler warnings for MSVC.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: a18dab4184501635bcf038e5370c060a09800aa1bb0848c5e37188f5df39235c
User & Date: drh 2025-09-27 11:57:33.491
Context
2025-09-27
15:05
Avoid an unnecessary mprintf(). check-in: bd4cec1240 user: danield tags: trunk
11:57
Further reduce divergence between the SQLite and Fossil implementations of regexp.c. Fix compiler warnings for MSVC. check-in: a18dab4184 user: drh tags: trunk
11:10
Update the built-in SQLite to the latest trunk version so that it will compile without warnings on Windows. check-in: 702a56d116 user: drh tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to extsrc/shell.c.
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.
**
** To help prevent DoS attacks, the size of the NFA is limit to
** SQLITE_MAX_REGEXP states, default 9999.
*/
#include <string.h>
#include <stdlib.h>
/* #include "sqlite3ext.h" */
SQLITE_EXTENSION_INIT1

#ifndef SQLITE_MAX_REGEXP
# define SQLITE_MAX_REGEXP 9999
#endif

/*
** The following #defines change the names of some functions implemented in
** this file to prevent name collisions with C-library functions of the
** same name.
*/
#define re_match   sqlite3re_match
#define re_compile sqlite3re_compile







|
<






<
<
<
<







7024
7025
7026
7027
7028
7029
7030
7031

7032
7033
7034
7035
7036
7037




7038
7039
7040
7041
7042
7043
7044
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.
**
** To help prevent DoS attacks, the maximum size of the NFA is restricted.

*/
#include <string.h>
#include <stdlib.h>
/* #include "sqlite3ext.h" */
SQLITE_EXTENSION_INIT1





/*
** The following #defines change the names of some functions implemented in
** this file to prevent name collisions with C-library functions of the
** same name.
*/
#define re_match   sqlite3re_match
#define re_compile sqlite3re_compile
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
#define RE_OP_NOTWORD    12    /* Not a perl word character */
#define RE_OP_DIGIT      13    /* digit:  [0-9] */
#define RE_OP_NOTDIGIT   14    /* Not a digit */
#define RE_OP_SPACE      15    /* space:  [ \t\n\r\v\f] */
#define RE_OP_NOTSPACE   16    /* Not a digit */
#define RE_OP_BOUNDARY   17    /* Boundary between word and non-word */
#define RE_OP_ATSTART    18    /* Currently at the start of the string */

#if defined(SQLITE_DEBUG)
/* Opcode names used for symbolic debugging */
static const char *ReOpName[] = {
  "EOF",
  "MATCH",
  "ANY",
  "ANYSTAR",
  "FORK",
  "GOTO",
  "ACCEPT",
  "CC_INC",
  "CC_EXC",
  "CC_VALUE",
  "CC_RANGE",
  "WORD",
  "NOTWORD",
  "DIGIT",
  "NOTDIGIT",
  "SPACE",
  "NOTSPACE",
  "BOUNDARY",
  "ATSTART",
};
#endif /* SQLITE_DEBUG */


/* Each opcode is a "state" in the NFA */
typedef unsigned short ReStateNumber;

/* Because this is an NFA and not a DFA, multiple states can be active at
** once.  An instance of the following object records all active states in
** the NFA.  The implementation is optimized for the common case where the







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







7065
7066
7067
7068
7069
7070
7071


























7072
7073
7074
7075
7076
7077
7078
#define RE_OP_NOTWORD    12    /* Not a perl word character */
#define RE_OP_DIGIT      13    /* digit:  [0-9] */
#define RE_OP_NOTDIGIT   14    /* Not a digit */
#define RE_OP_SPACE      15    /* space:  [ \t\n\r\v\f] */
#define RE_OP_NOTSPACE   16    /* Not a digit */
#define RE_OP_BOUNDARY   17    /* Boundary between word and non-word */
#define RE_OP_ATSTART    18    /* Currently at the start of the string */



























/* Each opcode is a "state" in the NFA */
typedef unsigned short ReStateNumber;

/* Because this is an NFA and not a DFA, multiple states can be active at
** once.  An instance of the following object records all active states in
** the NFA.  The implementation is optimized for the common case where the
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
re_match_end:
  sqlite3_free(pToFree);
  return rc;
}

/* Resize the opcode and argument arrays for an RE under construction.
*/
static int re_resize(ReCompiled *p, int N){
  char *aOp;
  int *aArg;
  if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; }
  aOp = sqlite3_realloc64(p->aOp, N*sizeof(p->aOp[0]));
  if( aOp==0 ){ p->zErr = "out of memory"; return 1; }
  p->aOp = aOp;
  aArg = sqlite3_realloc64(p->aArg, N*sizeof(p->aArg[0]));







|







7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
re_match_end:
  sqlite3_free(pToFree);
  return rc;
}

/* Resize the opcode and argument arrays for an RE under construction.
*/
static int re_resize(ReCompiled *p, unsigned int N){
  char *aOp;
  int *aArg;
  if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; }
  aOp = sqlite3_realloc64(p->aOp, N*sizeof(p->aOp[0]));
  if( aOp==0 ){ p->zErr = "out of memory"; return 1; }
  p->aOp = aOp;
  aArg = sqlite3_realloc64(p->aArg, N*sizeof(p->aArg[0]));
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
static int re_append(ReCompiled *p, int op, int arg){
  return re_insert(p, p->nState, op, arg);
}

/* Make a copy of N opcodes starting at iStart onto the end of the RE
** under construction.
*/
static void re_copy(ReCompiled *p, int iStart, int N){
  if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return;
  memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0]));
  memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0]));
  p->nState += N;
}

/* Return true if c is a hexadecimal digit character:  [0-9a-fA-F]







|







7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
static int re_append(ReCompiled *p, int op, int arg){
  return re_insert(p, p->nState, op, arg);
}

/* Make a copy of N opcodes starting at iStart onto the end of the RE
** under construction.
*/
static void re_copy(ReCompiled *p, int iStart, unsigned int N){
  if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return;
  memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0]));
  memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0]));
  p->nState += N;
}

/* Return true if c is a hexadecimal digit character:  [0-9a-fA-F]
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
  return 0;
}

/* Free and reclaim all the memory used by a previously compiled
** regular expression.  Applications should invoke this routine once
** for every call to re_compile() to avoid memory leaks.
*/
static void re_free(void *p){
  ReCompiled *pRe = (ReCompiled*)p;
  if( pRe ){
    sqlite3_free(pRe->aOp);
    sqlite3_free(pRe->aArg);
    sqlite3_free(pRe);
  }
}








|
<







7610
7611
7612
7613
7614
7615
7616
7617

7618
7619
7620
7621
7622
7623
7624
  return 0;
}

/* Free and reclaim all the memory used by a previously compiled
** regular expression.  Applications should invoke this routine once
** for every call to re_compile() to avoid memory leaks.
*/
static void re_free(ReCompiled *pRe){

  if( pRe ){
    sqlite3_free(pRe->aOp);
    sqlite3_free(pRe->aArg);
    sqlite3_free(pRe);
  }
}

7806
7807
7808
7809
7810
7811
7812





















7813
7814
7815
7816
7817
7818
7819
  const char *zErr;
  ReCompiled *pRe;
  sqlite3_str *pStr;
  int i;
  int n;
  char *z;
  (void)argc;






















  zPattern = (const char*)sqlite3_value_text(argv[0]);
  if( zPattern==0 ) return;
  zErr = re_compile(&pRe, zPattern, re_maxlen(context),
                    sqlite3_user_data(context)!=0);
  if( zErr ){
    re_free(pRe);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
  const char *zErr;
  ReCompiled *pRe;
  sqlite3_str *pStr;
  int i;
  int n;
  char *z;
  (void)argc;
  static const char *ReOpName[] = {
    "EOF",
    "MATCH",
    "ANY",
    "ANYSTAR",
    "FORK",
    "GOTO",
    "ACCEPT",
    "CC_INC",
    "CC_EXC",
    "CC_VALUE",
    "CC_RANGE",
    "WORD",
    "NOTWORD",
    "DIGIT",
    "NOTDIGIT",
    "SPACE",
    "NOTSPACE",
    "BOUNDARY",
    "ATSTART",
  };

  zPattern = (const char*)sqlite3_value_text(argv[0]);
  if( zPattern==0 ) return;
  zErr = re_compile(&pRe, zPattern, re_maxlen(context),
                    sqlite3_user_data(context)!=0);
  if( zErr ){
    re_free(pRe);
Changes to extsrc/sqlite3.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** the text of this file.  Search for "Begin file sqlite3.h" to find the start
** of the embedded sqlite3.h header file.) Additional code files may be needed
** if you want a wrapper to interface SQLite with your choice of programming
** language. The code for the "sqlite3" command-line shell is also in a
** separate file. This file contains only code for the core SQLite library.
**
** The content in this amalgamation comes from Fossil check-in
** 869c968569b09d05a5b7d587d8fddb3b4611 with changes in files:
**
**    
*/
#ifndef SQLITE_AMALGAMATION
#define SQLITE_CORE 1
#define SQLITE_AMALGAMATION 1
#ifndef SQLITE_PRIVATE







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** the text of this file.  Search for "Begin file sqlite3.h" to find the start
** of the embedded sqlite3.h header file.) Additional code files may be needed
** if you want a wrapper to interface SQLite with your choice of programming
** language. The code for the "sqlite3" command-line shell is also in a
** separate file. This file contains only code for the core SQLite library.
**
** The content in this amalgamation comes from Fossil check-in
** 2b34b750b5528b6dda195bc1a3895dc3fe46 with changes in files:
**
**    
*/
#ifndef SQLITE_AMALGAMATION
#define SQLITE_CORE 1
#define SQLITE_AMALGAMATION 1
#ifndef SQLITE_PRIVATE
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
**
** See also: [sqlite3_libversion()],
** [sqlite3_libversion_number()], [sqlite3_sourceid()],
** [sqlite_version()] and [sqlite_source_id()].
*/
#define SQLITE_VERSION        "3.51.0"
#define SQLITE_VERSION_NUMBER 3051000
#define SQLITE_SOURCE_ID      "2025-09-26 15:38:52 869c968569b09d05a5b7d587d8fddb3b4611daf7467dc157701e5dc6c960alt1"
#define SQLITE_SCM_BRANCH     "trunk"
#define SQLITE_SCM_TAGS       ""
#define SQLITE_SCM_DATETIME   "2025-09-26T15:38:52.279Z"

/*
** CAPI3REF: Run-Time Library Version Numbers
** KEYWORDS: sqlite3_version sqlite3_sourceid
**
** These interfaces provide the same information as the [SQLITE_VERSION],
** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros







|


|







465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
**
** See also: [sqlite3_libversion()],
** [sqlite3_libversion_number()], [sqlite3_sourceid()],
** [sqlite_version()] and [sqlite_source_id()].
*/
#define SQLITE_VERSION        "3.51.0"
#define SQLITE_VERSION_NUMBER 3051000
#define SQLITE_SOURCE_ID      "2025-09-27 11:54:49 2b34b750b5528b6dda195bc1a3895dc3fe46e70cbf992a78111316e2726c1ade"
#define SQLITE_SCM_BRANCH     "trunk"
#define SQLITE_SCM_TAGS       ""
#define SQLITE_SCM_DATETIME   "2025-09-27T11:54:49.147Z"

/*
** CAPI3REF: Run-Time Library Version Numbers
** KEYWORDS: sqlite3_version sqlite3_sourceid
**
** These interfaces provide the same information as the [SQLITE_VERSION],
** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros
258704
258705
258706
258707
258708
258709
258710
258711
258712
258713
258714
258715
258716
258717
258718
static void fts5SourceIdFunc(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apUnused        /* Function arguments */
){
  assert( nArg==0 );
  UNUSED_PARAM2(nArg, apUnused);
  sqlite3_result_text(pCtx, "fts5: 2025-09-26 11:47:13 d022ee167b90a7c32049a93d476e869270018017f60551185024409730d77640", -1, SQLITE_TRANSIENT);
}

/*
** Implementation of fts5_locale(LOCALE, TEXT) function.
**
** If parameter LOCALE is NULL, or a zero-length string, then a copy of
** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as







|







258704
258705
258706
258707
258708
258709
258710
258711
258712
258713
258714
258715
258716
258717
258718
static void fts5SourceIdFunc(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apUnused        /* Function arguments */
){
  assert( nArg==0 );
  UNUSED_PARAM2(nArg, apUnused);
  sqlite3_result_text(pCtx, "fts5: 2025-09-27 11:54:49 2b34b750b5528b6dda195bc1a3895dc3fe46e70cbf992a78111316e2726c1ade", -1, SQLITE_TRANSIENT);
}

/*
** Implementation of fts5_locale(LOCALE, TEXT) function.
**
** If parameter LOCALE is NULL, or a zero-length string, then a copy of
** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as
Changes to extsrc/sqlite3.h.
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
**
** See also: [sqlite3_libversion()],
** [sqlite3_libversion_number()], [sqlite3_sourceid()],
** [sqlite_version()] and [sqlite_source_id()].
*/
#define SQLITE_VERSION        "3.51.0"
#define SQLITE_VERSION_NUMBER 3051000
#define SQLITE_SOURCE_ID      "2025-09-26 15:38:52 869c968569b09d05a5b7d587d8fddb3b4611daf7467dc157701e5dc6c960alt1"
#define SQLITE_SCM_BRANCH     "trunk"
#define SQLITE_SCM_TAGS       ""
#define SQLITE_SCM_DATETIME   "2025-09-26T15:38:52.279Z"

/*
** CAPI3REF: Run-Time Library Version Numbers
** KEYWORDS: sqlite3_version sqlite3_sourceid
**
** These interfaces provide the same information as the [SQLITE_VERSION],
** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros







|


|







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
**
** See also: [sqlite3_libversion()],
** [sqlite3_libversion_number()], [sqlite3_sourceid()],
** [sqlite_version()] and [sqlite_source_id()].
*/
#define SQLITE_VERSION        "3.51.0"
#define SQLITE_VERSION_NUMBER 3051000
#define SQLITE_SOURCE_ID      "2025-09-27 11:54:49 2b34b750b5528b6dda195bc1a3895dc3fe46e70cbf992a78111316e2726c1ade"
#define SQLITE_SCM_BRANCH     "trunk"
#define SQLITE_SCM_TAGS       ""
#define SQLITE_SCM_DATETIME   "2025-09-27T11:54:49.147Z"

/*
** CAPI3REF: Run-Time Library Version Numbers
** KEYWORDS: sqlite3_version sqlite3_sourceid
**
** These interfaces provide the same information as the [SQLITE_VERSION],
** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros
Changes to src/regexp.c.
51
52
53
54
55
56
57


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
** A nondeterministic finite automaton (NFA) is used for matching, so the
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.


*/
#include "config.h"
#include "regexp.h"

#ifndef SQLITE_MAX_REGEXP_REPEAT
# define SQLITE_MAX_REGEXP_REPEAT 999
#endif

/* The end-of-input character */
#define RE_EOF            0    /* End of input */
#define RE_START  0xfffffff    /* Start of input - larger than an UTF-8 */

/* The NFA is implemented as sequence of opcodes taken from the following
** set.  Each opcode has a single integer argument.
*/







>
>




<
<
<
<







51
52
53
54
55
56
57
58
59
60
61
62
63




64
65
66
67
68
69
70
** A nondeterministic finite automaton (NFA) is used for matching, so the
** performance is bounded by O(N*M) where N is the size of the regular
** expression and M is the size of the input string.  The matcher never
** exhibits exponential behavior.  Note that the X{p,q} operator expands
** to p copies of X following by q-p copies of X? and that the size of the
** regular expression in the O(N*M) performance bound is computed after
** this expansion.
**
** To help prevent DoS attacks, the maximum size of the NFA is restricted.
*/
#include "config.h"
#include "regexp.h"





/* The end-of-input character */
#define RE_EOF            0    /* End of input */
#define RE_START  0xfffffff    /* Start of input - larger than an UTF-8 */

/* The NFA is implemented as sequence of opcodes taken from the following
** set.  Each opcode has a single integer argument.
*/
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
struct ReCompiled {
  ReInput sIn;                /* Regular expression text */
  const char *zErr;           /* Error message to return */
  char *aOp;                  /* Operators for the virtual machine */
  int *aArg;                  /* Arguments to each operator */
  unsigned (*xNextChar)(ReInput*);  /* Next character function */
  unsigned char zInit[12];    /* Initial text to match */
  int nInit;                  /* Number of characters in zInit */
  unsigned nState;            /* Number of entries in aOp[] and aArg[] */
  unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */
  unsigned mxAlloc;           /* Complexity limit */
};
#endif

/* Add a state to the given state set if it is not already there */







|







115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
struct ReCompiled {
  ReInput sIn;                /* Regular expression text */
  const char *zErr;           /* Error message to return */
  char *aOp;                  /* Operators for the virtual machine */
  int *aArg;                  /* Arguments to each operator */
  unsigned (*xNextChar)(ReInput*);  /* Next character function */
  unsigned char zInit[12];    /* Initial text to match */
  int nInit;                  /* Number of bytes in zInit */
  unsigned nState;            /* Number of entries in aOp[] and aArg[] */
  unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */
  unsigned mxAlloc;           /* Complexity limit */
};
#endif

/* Add a state to the given state set if it is not already there */
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
      c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f);
      if( c<0x80 ) c = 0xfffd;
    }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
           && (p->z[p->i+1]&0xc0)==0x80 ){
      c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
      p->i += 2;
      if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
    }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
           && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
      c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
                       | (p->z[p->i+2]&0x3f);
      p->i += 3;
      if( c<=0xffff || c>0x10ffff ) c = 0xfffd;
    }else{
      c = 0xfffd;







|







147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
      c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f);
      if( c<0x80 ) c = 0xfffd;
    }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
           && (p->z[p->i+1]&0xc0)==0x80 ){
      c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
      p->i += 2;
      if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
    }else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80
           && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
      c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
                       | (p->z[p->i+2]&0x3f);
      p->i += 3;
      if( c<=0xffff || c>0x10ffff ) c = 0xfffd;
    }else{
      c = 0xfffd;
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
        }
        case RE_OP_ACCEPT: {
          rc = 1;
          goto re_match_end;
        }
        case RE_OP_CC_EXC: {
          if( c==0 ) break;
          /* fall-through */
        }
        case RE_OP_CC_INC: {
          int j = 1;
          int n = pRe->aArg[x];
          int hit = 0;
          for(j=1; j>0 && j<n; j++){
            if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){
              if( pRe->aArg[x+j]==c ){
                hit = 1;







|

|







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
        }
        case RE_OP_ACCEPT: {
          rc = 1;
          goto re_match_end;
        }
        case RE_OP_CC_EXC: {
          if( c==0 ) break;
          /* fall-through */ goto re_op_cc_inc;
        }
        case RE_OP_CC_INC: re_op_cc_inc: {
          int j = 1;
          int n = pRe->aArg[x];
          int hit = 0;
          for(j=1; j>0 && j<n; j++){
            if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){
              if( pRe->aArg[x+j]==c ){
                hit = 1;
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
          while( (c=rePeek(p))>='0' && c<='9' ){
            n = n*10 + c-'0';
            if( n*2>p->mxAlloc ) return "REGEXP pattern too big";
            p->sIn.i++;
          }
        }
        if( c!='}' ) return "unmatched '{'";
        if( n>0 && n<m ) return "n less than m in '{m,n}'";
        p->sIn.i++;
        sz = p->nState - iPrev;
        if( m==0 ){
          if( n==0 ) return "both m and n are zero in '{m,n}'";
          re_insert(p, iPrev, RE_OP_FORK, sz+1);
          iPrev++;
          n--;







|







543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
          while( (c=rePeek(p))>='0' && c<='9' ){
            n = n*10 + c-'0';
            if( n*2>p->mxAlloc ) return "REGEXP pattern too big";
            p->sIn.i++;
          }
        }
        if( c!='}' ) return "unmatched '{'";
        if( n<m ) return "n less than m in '{m,n}'";
        p->sIn.i++;
        sz = p->nState - iPrev;
        if( m==0 ){
          if( n==0 ) return "both m and n are zero in '{m,n}'";
          re_insert(p, iPrev, RE_OP_FORK, sz+1);
          iPrev++;
          n--;
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655

/*
** Compile a textual regular expression in zIn[] into a compiled regular
** expression suitable for us by re_match() and return a pointer to the
** compiled regular expression in *ppRe.  Return NULL on success or an
** error message if something goes wrong.
*/
const char *re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int mxRe,               /* Complexity limit */
  int noCase              /* True for caseless comparisons */
){
  ReCompiled *pRe;
  const char *zErr;







|







639
640
641
642
643
644
645
646
647
648
649
650
651
652
653

/*
** Compile a textual regular expression in zIn[] into a compiled regular
** expression suitable for us by re_match() and return a pointer to the
** compiled regular expression in *ppRe.  Return NULL on success or an
** error message if something goes wrong.
*/
static const char *re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int mxRe,               /* Complexity limit */
  int noCase              /* True for caseless comparisons */
){
  ReCompiled *pRe;
  const char *zErr;
714
715
716
717
718
719
720





































































721
722
723
724
725
726
727
      }
    }
    if( j>0 && pRe->zInit[j-1]==0 ) j--;
    pRe->nInit = j;
  }
  return pRe->zErr;
}






































































/*
** The input zIn is a string that we want to match exactly as part of
** a regular expression.  Return a new string (in space obtained from
** fossil_malloc() or the equivalent) that escapes all regexp syntax
** characters in zIn.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
      }
    }
    if( j>0 && pRe->zInit[j-1]==0 ) j--;
    pRe->nInit = j;
  }
  return pRe->zErr;
}

/*
** Implementation of the regexp() SQL function.  This function implements
** the build-in REGEXP operator.  The first argument to the function is the
** pattern and the second argument is the string.  So, the SQL statements:
**
**       A REGEXP B
**
** is implemented as regexp(B,A).
*/
static void re_sql_func(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  ReCompiled *pRe;          /* Compiled regular expression */
  const char *zPattern;     /* The regular expression */
  const unsigned char *zStr;/* String being searched */
  const char *zErr;         /* Compile error message */
  int setAux = 0;           /* True to invoke sqlite3_set_auxdata() */

  (void)argc;  /* Unused */
  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
    if( zErr ){
      re_free(pRe);
      /* The original SQLite function from which this code was copied raises
      ** an error if the REGEXP contained a syntax error.  This variant
      ** silently fails to match, as that works better for Fossil.
      ** sqlite3_result_error(context, zErr, -1); */
      sqlite3_result_int(context, 0);
      return;
    }
    if( pRe==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    setAux = 1;
  }
  zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
  if( zStr!=0 ){
    sqlite3_result_int(context, re_match(pRe, zStr, -1));
  }
  if( setAux ){
    sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
  }
}

/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
*/
int re_add_sql_func(sqlite3 *db){
  int rc;
  rc = sqlite3_create_function(db, "regexp", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                               0, re_sql_func, 0, 0);
  if( rc==SQLITE_OK ){
    /* The regexpi(PATTERN,STRING) function is a case-insensitive version
    ** of regexp(PATTERN,STRING). */
    rc = sqlite3_create_function(db, "regexpi", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                                 (void*)db, re_sql_func, 0, 0);
  }
  return rc;
}

/*
** The input zIn is a string that we want to match exactly as part of
** a regular expression.  Return a new string (in space obtained from
** fossil_malloc() or the equivalent) that escapes all regexp syntax
** characters in zIn.
*/
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783

784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
** SETTING:  regexp-limit                  width=8 default=1000
**
** Limit the size of the bytecode used to implement a regular expression
** to this many steps.  It is important to limit this to avoid possible
** DoS attacks.
*/

/*
** Compute a reasonable limit on the length of the REGEXP NFA.
*/
int re_maxlen(void){
  return g.db ? db_get_int("regexp-limit", 1000) : 1000;
}

/*
** Compile an RE using re_maxlen().
*/
const char *fossil_re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int noCase              /* True for caseless comparisons */
){

  return re_compile(ppRe, zIn, re_maxlen(), noCase);
}

/*
** Implementation of the regexp() SQL function.  This function implements
** the build-in REGEXP operator.  The first argument to the function is the
** pattern and the second argument is the string.  So, the SQL statements:
**
**       A REGEXP B
**
** is implemented as regexp(B,A).
*/
static void re_sql_func(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  ReCompiled *pRe;          /* Compiled regular expression */
  const char *zPattern;     /* The regular expression */
  const unsigned char *zStr;/* String being searched */
  const char *zErr;         /* Compile error message */
  int setAux = 0;           /* True to invoke sqlite3_set_auxdata() */

  (void)argc;  /* Unused */
  pRe = sqlite3_get_auxdata(context, 0);
  if( pRe==0 ){
    zPattern = (const char*)sqlite3_value_text(argv[0]);
    if( zPattern==0 ) return;
    zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
    if( zErr ){
      re_free(pRe);
      sqlite3_result_int(context, 0);
      /* sqlite3_result_error(context, zErr, -1); */
      return;
    }
    if( pRe==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    setAux = 1;
  }
  zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
  if( zStr!=0 ){
    sqlite3_result_int(context, re_match(pRe, zStr, -1));
  }
  if( setAux ){
    sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
  }
}

/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
*/
int re_add_sql_func(sqlite3 *db){
  int rc;
  rc = sqlite3_create_function(db, "regexp", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                               0, re_sql_func, 0, 0);
  if( rc==SQLITE_OK ){
    /* The regexpi(PATTERN,STRING) function is a case-insensitive version
    ** of regexp(PATTERN,STRING). */
    rc = sqlite3_create_function(db, "regexpi", 2,
                           SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
                                 (void*)db, re_sql_func, 0, 0);
  }
  return rc;
}

/*
** Run a "grep" over a single file read from disk.
*/
static void grep_file(ReCompiled *pRe, const char *zFile, FILE *in){
  int ln = 0;








<
<
<
<
<
<
<







>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







829
830
831
832
833
834
835
836







837
838
839
840
841
842
843
844
845


































































846
847
848
849
850
851
852
** SETTING:  regexp-limit                  width=8 default=1000
**
** Limit the size of the bytecode used to implement a regular expression
** to this many steps.  It is important to limit this to avoid possible
** DoS attacks.
*/

/*







** Compile an RE using re_maxlen().
*/
const char *fossil_re_compile(
  ReCompiled **ppRe,      /* OUT: write compiled NFA here */
  const char *zIn,        /* Input regular expression */
  int noCase              /* True for caseless comparisons */
){
  int mxLen = g.db ? db_get_int("regexp-limit",1000) : 1000;
  return re_compile(ppRe, zIn, mxLen, noCase);


































































}

/*
** Run a "grep" over a single file read from disk.
*/
static void grep_file(ReCompiled *pRe, const char *zFile, FILE *in){
  int ln = 0;