Fossil: Check-in [82888a0d35]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Add a new setting "regexp-limit" that determines the maximum size of a REGEXP virtual machine. Default value 1000.
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| trunk
Files:	files \| file ages \| folders
SHA3-256:	82888a0d35a80944460eff38b34a25d942e141b1778af1e56451d7029361660c
User & Date:	drh 2025-09-26 20:07:06.903

Context

2025-09-27
11:10		Update the built-in SQLite to the latest trunk version so that it will compile without warnings on Windows. ... (check-in: 702a56d116 user: drh tags: trunk)
00:47		Use the datetime of the start of the branch as the input for the color hash. See suggestion made by Stephan Beal in [forum:/forumpost/a9a92d73c4a172f9\|forum post a9a92d73c4a172f9]. ... (check-in: 1851b26d2b user: andybradford tags: datetime-color-hash)
2025-09-26
20:07		Add a new setting "regexp-limit" that determines the maximum size of a REGEXP virtual machine. Default value 1000. ... (check-in: 82888a0d35 user: drh tags: trunk)
14:23		Updates to the changelog. ... (check-in: 28483bfc5a user: danield tags: trunk)

Changes

Changes to src/browse.c.

Changes to src/diff.c.

Changes to src/dispatch.c.

Changes to src/info.c.

Changes to src/json.c.

Changes to src/match.c.

Changes to src/regexp.c.

Changes to src/robot.c.

Changes to src/th_main.c.

︙			︙
3384 3385 3386 3387 3388 3389 3390 ~~3391~~ 3392 3393 3394 3395 3396 3397 3398	return; } find_option("i",0,0); find_option("v",0,0); diff_options(&DCfg, 0, 0); zRe = find_option("regexp","e",1); if( zRe ){ ~~const char *zErr = re_compile(&DCfg.pRe, zRe, 0);~~ if( zErr ) fossil_fatal("regex error: %s", zErr); } verify_all_options(); if( g.argc!=4 ) usage("FILE1 FILE2"); blob_zero(&out); diff_begin(&DCfg); diff_print_filenames(g.argv[2], g.argv[3], &DCfg, &out);	\|	3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398	return; } find_option("i",0,0); find_option("v",0,0); diff_options(&DCfg, 0, 0); zRe = find_option("regexp","e",1); if( zRe ){ const char *zErr = fossil_re_compile(&DCfg.pRe, zRe, 0); if( zErr ) fossil_fatal("regex error: %s", zErr); } verify_all_options(); if( g.argc!=4 ) usage("FILE1 FILE2"); blob_zero(&out); diff_begin(&DCfg); diff_print_filenames(g.argv[2], g.argv[3], &DCfg, &out);
︙			︙
3427 3428 3429 3430 3431 3432 3433 ~~3434~~ 3435 3436 3437 3438 3439 3440 3441	return; } find_option("i",0,0); find_option("v",0,0); diff_options(&DCfg, 0, 0); zRe = find_option("regexp","e",1); if( zRe ){ ~~const char *zErr = re_compile(&DCfg.pRe, zRe, 0);~~ if( zErr ) fossil_fatal("regex error: %s", zErr); } db_find_and_open_repository(0, 0); verify_all_options(); if( g.argc!=4 ) usage("HASH1 HASH2"); blob_zero(&out); diff_begin(&DCfg);	\|	3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441	return; } find_option("i",0,0); find_option("v",0,0); diff_options(&DCfg, 0, 0); zRe = find_option("regexp","e",1); if( zRe ){ const char *zErr = fossil_re_compile(&DCfg.pRe, zRe, 0); if( zErr ) fossil_fatal("regex error: %s", zErr); } db_find_and_open_repository(0, 0); verify_all_options(); if( g.argc!=4 ) usage("HASH1 HASH2"); blob_zero(&out); diff_begin(&DCfg);
︙			︙

︙			︙
1153 1154 1155 1156 1157 1158 1159 ~~1160~~ 1161 1162 1163 1164 1165 1166 1167	if( bAbbrevSubcmd ){ zPattern = mprintf(" ([a-z]+ ?\\\| ?)%s\\b", zQSub); }else{ zPattern = mprintf("> ?fossil [-a-z]+ .\\b%s\\b", zQSub); } fossil_free(zQTop); fossil_free(zQSub); ~~re_compile(&pRe, zPattern, 0);~~ fossil_free(zPattern); blob_init(&in, z, -1); while( blob_line(&in, &line) ){ if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_size(&line)) ){ int atStart = 1; blob_appendb(pOut, &line); n++;	\|	1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167	if( bAbbrevSubcmd ){ zPattern = mprintf(" ([a-z]+ ?\\\| ?)%s\\b", zQSub); }else{ zPattern = mprintf("> ?fossil [-a-z]+ .\\b%s\\b", zQSub); } fossil_free(zQTop); fossil_free(zQSub); fossil_re_compile(&pRe, zPattern, 0); fossil_free(zPattern); blob_init(&in, z, -1); while( blob_line(&in, &line) ){ if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_size(&line)) ){ int atStart = 1; blob_appendb(pOut, &line); n++;
︙			︙
1250 1251 1252 1253 1254 1255 1256 ~~1257~~ 1258 ~~1259~~ 1260 1261 1262 1263 1264 1265 1266	int bAbbrevSubcmd /* z[] uses abbreviated subcommands / ){ ReCompiled pRe = 0; Blob in, line; int n = 0; if( bAbbrevSubcmd ){ ~~re_compile(&pRe, "^(Usage: \| [a-z][-a-z\|]+ .)", 0);~~ }else{ ~~re_compile(&pRe, "^(Usage: \| [Oo]r: +%fossi \|> ?fossil )", 0);~~ } blob_init(&in, z, -1); while( blob_line(&in, &line) ){ if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_strlen(&line)) ){ simplify_usage_line(&line, pOut, bAbbrevSubcmd, zTopic); n++; }	\| \|	1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266	int bAbbrevSubcmd /* z[] uses abbreviated subcommands / ){ ReCompiled pRe = 0; Blob in, line; int n = 0; if( bAbbrevSubcmd ){ fossil_re_compile(&pRe, "^(Usage: \| [a-z][-a-z\|]+ .)", 0); }else{ fossil_re_compile(&pRe, "^(Usage: \| [Oo]r: +%fossi \|> ?fossil )", 0); } blob_init(&in, z, -1); while( blob_line(&in, &line) ){ if( re_match(pRe, (unsigned char*)blob_buffer(&line), blob_strlen(&line)) ){ simplify_usage_line(&line, pOut, bAbbrevSubcmd, zTopic); n++; }
︙			︙
1283 1284 1285 1286 1287 1288 1289 ~~1290~~ 1291 1292 1293 1294 1295 1296 1297	ReCompiled pRe = 0; Blob txt, line, subsection; int n = 0; int bSubsectionSeen = 0; blob_init(&txt, z, -1); blob_init(&subsection, 0, 0); ~~re_compile(&pRe, "^ +-. ", 0);~~ while( blob_line(&txt, &line) ){ int len = blob_size(&line); unsigned char zLine = (unsigned char )blob_buffer(&line); if( re_match(pRe, zLine, len) ){ if( blob_size(&subsection) ){ simplify_usage_line(&subsection, pOut, bAbbrevSubcmd, zCmd); blob_reset(&subsection);	\|	1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297	ReCompiled pRe = 0; Blob txt, line, subsection; int n = 0; int bSubsectionSeen = 0; blob_init(&txt, z, -1); blob_init(&subsection, 0, 0); fossil_re_compile(&pRe, "^ +-. ", 0); while( blob_line(&txt, &line) ){ int len = blob_size(&line); unsigned char zLine = (unsigned char )blob_buffer(&line); if( re_match(pRe, zLine, len) ){ if( blob_size(&subsection) ){ simplify_usage_line(&subsection, pOut, bAbbrevSubcmd, zCmd); blob_reset(&subsection);
︙			︙

︙			︙
934 935 936 937 938 939 940 ~~941~~ 942 943 944 945 946 947 948	if( rid==0 ){ style_header("Check-in Information Error"); @ No such object: %h(zName) style_finish_page(); return; } zRe = P("regex"); ~~if( zRe ) re_compile(&pRe, zRe, 0);~~ zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid); zParent = db_text(0, "SELECT uuid FROM plink, blob" " WHERE plink.cid=%d AND blob.rid=plink.pid AND plink.isprim", rid ); isLeaf = !db_exists("SELECT 1 FROM plink WHERE pid=%d", rid);	\|	934 935 936 937 938 939 940 941 942 943 944 945 946 947 948	if( rid==0 ){ style_header("Check-in Information Error"); @ No such object: %h(zName) style_finish_page(); return; } zRe = P("regex"); if( zRe ) fossil_re_compile(&pRe, zRe, 0); zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid); zParent = db_text(0, "SELECT uuid FROM plink, blob" " WHERE plink.cid=%d AND blob.rid=plink.pid AND plink.isprim", rid ); isLeaf = !db_exists("SELECT 1 FROM plink WHERE pid=%d", rid);
︙			︙
1446 1447 1448 1449 1450 1451 1452 ~~1453~~ 1454 1455 1456 1457 1458 1459 1460	if( robot_restrict("diff") ) return; login_anonymous_available(); fossil_nice_default(); blob_init(&qp, 0, 0); blob_init(&qpGlob, 0, 0); diffType = preferred_diff_type(); zRe = P("regex"); ~~if( zRe ) re_compile(&pRe, zRe, 0);~~ zBranch = P("branch"); if( zBranch && zBranch[0]==0 ) zBranch = 0; if( zBranch ){ blob_appendf(&qp, "branch=%T", zBranch); zMergeOrigin = mprintf("merge-in:%s", zBranch); cgi_replace_parameter("from", zMergeOrigin); cgi_replace_parameter("to", zBranch);	\|	1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460	if( robot_restrict("diff") ) return; login_anonymous_available(); fossil_nice_default(); blob_init(&qp, 0, 0); blob_init(&qpGlob, 0, 0); diffType = preferred_diff_type(); zRe = P("regex"); if( zRe ) fossil_re_compile(&pRe, zRe, 0); zBranch = P("branch"); if( zBranch && zBranch[0]==0 ) zBranch = 0; if( zBranch ){ blob_appendf(&qp, "branch=%T", zBranch); zMergeOrigin = mprintf("merge-in:%s", zBranch); cgi_replace_parameter("from", zMergeOrigin); cgi_replace_parameter("to", zBranch);
︙			︙
2049 2050 2051 2052 2053 2054 2055 ~~2056~~ 2057 2058 2059 2060 2061 2062 2063	"%R/annotate?origin=%s&checkin=%s&filename=%T", zOrig, zCkin, zFN); } db_finalize(&q); } zRe = P("regex"); cgi_check_for_malice(); ~~if( zRe ) re_compile(&pRe, zRe, 0);~~ if( verbose ) objdescFlags \|= OBJDESC_DETAIL; if( isPatch ){ Blob c1, c2, *pOut; DiffConfig DCfg; pOut = cgi_output_blob(); cgi_set_content_type("text/plain"); DCfg.diffFlags = DIFF_VERBOSE;	\|	2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063	"%R/annotate?origin=%s&checkin=%s&filename=%T", zOrig, zCkin, zFN); } db_finalize(&q); } zRe = P("regex"); cgi_check_for_malice(); if( zRe ) fossil_re_compile(&pRe, zRe, 0); if( verbose ) objdescFlags \|= OBJDESC_DETAIL; if( isPatch ){ Blob c1, c2, *pOut; DiffConfig DCfg; pOut = cgi_output_blob(); cgi_set_content_type("text/plain"); DCfg.diffFlags = DIFF_VERBOSE;
︙			︙

︙			︙
140 141 142 143 144 145 146 ~~147~~ 148 149 150 151 152 153 154	zOne = fossil_strndup(zPat, i); zPat += i; if( zPat[0] ) zPat++; /* Check for regular expression syntax errors. / if( style==MS_REGEXP ){ ReCompiled regexp; ~~const char zFail = re_compile(&regexp, zOne, 0);~~ if( zFail ){ re_free(regexp); continue; } p->nPattern++; p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)p->nPattern); p->aRe[p->nPattern-1] = regexp;	\|	140 141 142 143 144 145 146 147 148 149 150 151 152 153 154	zOne = fossil_strndup(zPat, i); zPat += i; if( zPat[0] ) zPat++; /* Check for regular expression syntax errors. / if( style==MS_REGEXP ){ ReCompiled regexp; const char zFail = fossil_re_compile(&regexp, zOne, 0); if( zFail ){ re_free(regexp); continue; } p->nPattern++; p->aRe = fossil_realloc(p->aRe, sizeof(p->aRe)p->nPattern); p->aRe[p->nPattern-1] = regexp;
︙			︙
373 374 375 376 377 378 379 ~~380~~ 381 382 383 384 385 386 387	} } /* Check for regular expression syntax errors. / if( matchStyle==MS_REGEXP ){ ReCompiled regexp; char zTagDup = fossil_strndup(zTag, i); ~~zFail = re_compile(&regexp, zTagDup, 0);~~ re_free(regexp); fossil_free(zTagDup); } / Process success and error results. / if( !zFail ){ / Incorporate the match word into the output expression. %q is used to	\|	373 374 375 376 377 378 379 380 381 382 383 384 385 386 387	} } /* Check for regular expression syntax errors. / if( matchStyle==MS_REGEXP ){ ReCompiled regexp; char zTagDup = fossil_strndup(zTag, i); zFail = fossil_re_compile(&regexp, zTagDup, 0); re_free(regexp); fossil_free(zTagDup); } / Process success and error results. / if( !zFail ){ / Incorporate the match word into the output expression. %q is used to
︙			︙

︙			︙
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36	The following regular expression syntax is supported: ** X* zero or more occurrences of X X+ one or more occurrences of X X? zero or one occurrences of X X{p,q} between p and q occurrences of X~~, 0 <= p,q <= 999~~ (X) match X X\|Y X or Y ^X X occurring at the beginning of the string X$ X occurring at the end of the string . Match any single character ** \c Character c where c is one of \{}()[]\|+?. * \c C-language escapes for c in afnrtv. ex: \t or \n	\|	22 23 24 25 26 27 28 29 30 31 32 33 34 35 36	The following regular expression syntax is supported: ** X* zero or more occurrences of X X+ one or more occurrences of X X? zero or one occurrences of X X{p,q} between p and q occurrences of X (X) match X X\|Y X or Y ^X X occurring at the beginning of the string X$ X occurring at the end of the string . Match any single character ** \c Character c where c is one of \{}()[]\|+?. * \c C-language escapes for c in afnrtv. ex: \t or \n
︙			︙
51 52 53 54 55 56 57 ~~58 59 60~~ 61 62 63 64 65 66 67	A nondeterministic finite automaton (NFA) is used for matching, so the performance is bounded by O(NM) where N is the size of the regular * expression and M is the size of the input string. The matcher never exhibits exponential behavior. Note that the X{p,q} operator expands to p copies of X following by q-p copies of X? and that the size of the ** regular expression in the O(NM) performance bound is computed after * this expansion. To help prevent DoS attacks, the values of p and q in the "{p,q}" syntax ** are limited to SQLITE_MAX_REGEXP_REPEAT, default 999. */ #include "config.h" #include "regexp.h" #ifndef SQLITE_MAX_REGEXP_REPEAT # define SQLITE_MAX_REGEXP_REPEAT 999 #endif	< < <	51 52 53 54 55 56 57 58 59 60 61 62 63 64	A nondeterministic finite automaton (NFA) is used for matching, so the performance is bounded by O(NM) where N is the size of the regular * expression and M is the size of the input string. The matcher never exhibits exponential behavior. Note that the X{p,q} operator expands to p copies of X following by q-p copies of X? and that the size of the ** regular expression in the O(NM) performance bound is computed after * this expansion. */ #include "config.h" #include "regexp.h" #ifndef SQLITE_MAX_REGEXP_REPEAT # define SQLITE_MAX_REGEXP_REPEAT 999 #endif
︙			︙
123 124 125 126 127 128 129 130 131 132 133 134 135 136	char aOp; / Operators for the virtual machine / int aArg; /* Arguments to each operator / unsigned (xNextChar)(ReInput); / Next character function / unsigned char zInit[12]; / Initial text to match / int nInit; / Number of characters in zInit / unsigned nState; / Number of entries in aOp[] and aArg[] / unsigned nAlloc; / Slots allocated for aOp[] and aArg[] / }; #endif / Add a state to the given state set if it is not already there / static void re_add_state(ReStateSet pSet, int newState){ unsigned i; for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;	>	120 121 122 123 124 125 126 127 128 129 130 131 132 133 134	char aOp; / Operators for the virtual machine / int aArg; /* Arguments to each operator / unsigned (xNextChar)(ReInput); / Next character function / unsigned char zInit[12]; / Initial text to match / int nInit; / Number of characters in zInit / unsigned nState; / Number of entries in aOp[] and aArg[] / unsigned nAlloc; / Slots allocated for aOp[] and aArg[] / unsigned mxAlloc; / Complexity limit / }; #endif / Add a state to the given state set if it is not already there / static void re_add_state(ReStateSet pSet, int newState){ unsigned i; for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;
︙			︙
339 340 341 342 343 344 345 346 ~~347~~ 348 349 ~~350~~ 351 352 353 354 355 356 357	} /* Resize the opcode and argument arrays for an RE under construction. / static int re_resize(ReCompiled p, int N){ char aOp; int aArg; aOp = fossil_realloc(p->aOp, Nsizeof(p->aOp[0])); ~~if( aOp==0 ) return 1;~~ p->aOp = aOp; aArg = fossil_realloc(p->aArg, Nsizeof(p->aArg[0])); ~~if( aArg==0 ) return 1;~~ p->aArg = aArg; p->nAlloc = N; return 0; } /* Insert a new opcode and argument into an RE under construction. The ** insertion point is just prior to existing opcode iBefore.	> \| \|	337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356	} /* Resize the opcode and argument arrays for an RE under construction. / static int re_resize(ReCompiled p, int N){ char aOp; int aArg; if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; } aOp = fossil_realloc(p->aOp, Nsizeof(p->aOp[0])); if( aOp==0 ){ p->zErr = "out of memory"; return 1; } p->aOp = aOp; aArg = fossil_realloc(p->aArg, Nsizeof(p->aArg[0])); if( aArg==0 ){ p->zErr = "out of memory"; return 1; } p->aArg = aArg; p->nAlloc = N; return 0; } /* Insert a new opcode and argument into an RE under construction. The ** insertion point is just prior to existing opcode iBefore.
︙			︙
532 533 534 535 536 537 538 ~~539~~ 540 541 542 543 544 545 546 547 ~~548~~ 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 ~~569~~ 570 571 572 573 574 575 576	} case '{': { unsigned int m = 0, n = 0; unsigned int sz, j; if( iPrev<0 ) return "'{m,n}' without operand"; while( (c=rePeek(p))>='0' && c<='9' ){ m = m10 + c - '0'; ~~if( m>~~SQLITE_M~~A~~X_REGEXP_REPEAT~~ ) return "inte~~ger~~ too ~~large~~";~~ p->sIn.i++; } n = m; if( c==',' ){ p->sIn.i++; n = 0; while( (c=rePeek(p))>='0' && c<='9' ){ n = n10 + c-'0'; ~~if( n>~~SQLITE_M~~A~~X_REGEXP_REPEAT~~ ) return "inte~~ger~~ too ~~large~~";~~ p->sIn.i++; } } if( c!='}' ) return "unmatched '{'"; if( n>0 && n<m ) return "n less than m in '{m,n}'"; p->sIn.i++; sz = p->nState - iPrev; if( m==0 ){ if( n==0 ) return "both m and n are zero in '{m,n}'"; re_insert(p, iPrev, RE_OP_FORK, sz+1); iPrev++; n--; }else{ for(j=1; j<m; j++) re_copy(p, iPrev, sz); } for(j=m; j<n; j++){ re_append(p, RE_OP_FORK, sz+1); re_copy(p, iPrev, sz); } if( n==0 && m>0 ){ ~~re_append(p, RE_OP_FORK, -sz);~~ } break; } case '[': { unsigned int iFirst = p->nState; if( rePeek(p)=='^' ){ re_append(p, RE_OP_CC_EXC, 0);	\| \| \|	531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575	} case '{': { unsigned int m = 0, n = 0; unsigned int sz, j; if( iPrev<0 ) return "'{m,n}' without operand"; while( (c=rePeek(p))>='0' && c<='9' ){ m = m10 + c - '0'; if( m2>p->mxAlloc ) return "REGEXP pattern too big"; p->sIn.i++; } n = m; if( c==',' ){ p->sIn.i++; n = 0; while( (c=rePeek(p))>='0' && c<='9' ){ n = n10 + c-'0'; if( n2>p->mxAlloc ) return "REGEXP pattern too big"; p->sIn.i++; } } if( c!='}' ) return "unmatched '{'"; if( n>0 && n<m ) return "n less than m in '{m,n}'"; p->sIn.i++; sz = p->nState - iPrev; if( m==0 ){ if( n==0 ) return "both m and n are zero in '{m,n}'"; re_insert(p, iPrev, RE_OP_FORK, sz+1); iPrev++; n--; }else{ for(j=1; j<m; j++) re_copy(p, iPrev, sz); } for(j=m; j<n; j++){ re_append(p, RE_OP_FORK, sz+1); re_copy(p, iPrev, sz); } if( n==0 && m>0 ){ re_append(p, RE_OP_FORK, -(int)sz); } break; } case '[': { unsigned int iFirst = p->nState; if( rePeek(p)=='^' ){ re_append(p, RE_OP_CC_EXC, 0);
︙			︙
642 643 644 645 646 647 648 ~~649~~ 650 651 652 653 654 655 656 657 658 659 660 661 662 ~~663~~ 664 665 666 667 668 669 670	/* Compile a textual regular expression in zIn[] into a compiled regular expression suitable for us by re_match() and return a pointer to the ** compiled regular expression in ppRe. Return NULL on success or an * error message if something goes wrong. / ~~const char re_compile(~~ReCompiled *ppRe, const char zIn, int noCase){~~~~ ReCompiled pRe; const char zErr; int i, j; ppRe = 0; pRe = fossil_malloc( sizeof(pRe) ); if( pRe==0 ){ return "out of memory"; } memset(pRe, 0, sizeof(pRe)); pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; if( re_resize(pRe, 30) ){ re_free(pRe); ~~return ~~"out of memory"~~;~~ } if( zIn[0]=='^' ){ zIn++; }else{ re_append(pRe, RE_OP_ANYSTAR, 0); } pRe->sIn.z = (unsigned char)zIn;	\| > > > > > > > \|	641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676	/* Compile a textual regular expression in zIn[] into a compiled regular expression suitable for us by re_match() and return a pointer to the ** compiled regular expression in ppRe. Return NULL on success or an * error message if something goes wrong. / const char re_compile( ReCompiled *ppRe, / OUT: write compiled NFA here / const char zIn, /* Input regular expression / int mxRe, / Complexity limit / int noCase / True for caseless comparisons / ){ ReCompiled pRe; const char zErr; int i, j; ppRe = 0; pRe = fossil_malloc( sizeof(pRe) ); if( pRe==0 ){ return "out of memory"; } memset(pRe, 0, sizeof(pRe)); pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; pRe->mxAlloc = mxRe; if( re_resize(pRe, 30) ){ zErr = pRe->zErr; re_free(pRe); return zErr; } if( zIn[0]=='^' ){ zIn++; }else{ re_append(pRe, RE_OP_ANYSTAR, 0); } pRe->sIn.z = (unsigned char*)zIn;
︙			︙
747 748 749 750 751 752 753 754 755 756 757 758 759 760	} } zIn++; } blob_materialize(&out); return out.aData; } /* Implementation of the regexp() SQL function. This function implements the build-in REGEXP operator. The first argument to the function is the pattern and the second argument is the string. So, the SQL statements: ** A REGEXP B	> > > > > > > > > > > > > > > > > > > > > > > > > >	753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792	} } zIn++; } blob_materialize(&out); return out.aData; } /* SETTING: regexp-limit width=8 default=1000 Limit the size of the bytecode used to implement a regular expression to this many steps. It is important to limit this to avoid possible ** DoS attacks. / / ** Compute a reasonable limit on the length of the REGEXP NFA. / int re_maxlen(void){ return g.db ? db_get_int("regexp-limit", 1000) : 1000; } / ** Compile an RE using re_maxlen(). / const char fossil_re_compile( ReCompiled *ppRe, / OUT: write compiled NFA here / const char zIn, /* Input regular expression / int noCase / True for caseless comparisons / ){ return re_compile(ppRe, zIn, re_maxlen(), noCase); } / Implementation of the regexp() SQL function. This function implements the build-in REGEXP operator. The first argument to the function is the pattern and the second argument is the string. So, the SQL statements: ** A REGEXP B
︙			︙
773 774 775 776 777 778 779 ~~780~~ 781 782 783 784 785 786 787	int setAux = 0; /* True to invoke sqlite3_set_auxdata() / (void)argc; / Unused / pRe = sqlite3_get_auxdata(context, 0); if( pRe==0 ){ zPattern = (const char)sqlite3_value_text(argv[0]); if( zPattern==0 ) return; ~~zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);~~ if( zErr ){ re_free(pRe); sqlite3_result_int(context, 0); /* sqlite3_result_error(context, zErr, -1); */ return; } if( pRe==0 ){	\|	805 806 807 808 809 810 811 812 813 814 815 816 817 818 819	int setAux = 0; /* True to invoke sqlite3_set_auxdata() / (void)argc; / Unused / pRe = sqlite3_get_auxdata(context, 0); if( pRe==0 ){ zPattern = (const char)sqlite3_value_text(argv[0]); if( zPattern==0 ) return; zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); if( zErr ){ re_free(pRe); sqlite3_result_int(context, 0); /* sqlite3_result_error(context, zErr, -1); */ return; } if( pRe==0 ){
︙			︙
801 802 803 804 805 806 807 ~~808~~ 809 810 811 812 ~~813~~ 814 815 816 817 818 819 820	/* Invoke this routine to register the regexp() function with the SQLite database connection. / int re_add_sql_func(sqlite3 db){ int rc; ~~rc = sqlite3_create_function(db, "regexp", 2, ~~SQLITE_UTF8\|SQLITE_INNOCUOUS,~~~~ 0, re_sql_func, 0, 0); if( rc==SQLITE_OK ){ /* The regexpi(PATTERN,STRING) function is a case-insensitive version ** of regexp(PATTERN,STRING). / ~~rc = sqlite3_create_function(db, "regexpi", 2, ~~SQLITE_UTF8\|SQLITE_INNOCUOUS,~~~~ (void)db, re_sql_func, 0, 0); } return rc; } /* ** Run a "grep" over a single file read from disk.	\| > \| >	833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854	/* Invoke this routine to register the regexp() function with the SQLite database connection. / int re_add_sql_func(sqlite3 db){ int rc; rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8\|SQLITE_INNOCUOUS\|SQLITE_DETERMINISTIC, 0, re_sql_func, 0, 0); if( rc==SQLITE_OK ){ /* The regexpi(PATTERN,STRING) function is a case-insensitive version ** of regexp(PATTERN,STRING). / rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8\|SQLITE_INNOCUOUS\|SQLITE_DETERMINISTIC, (void)db, re_sql_func, 0, 0); } return rc; } /* ** Run a "grep" over a single file read from disk.
︙			︙
889 890 891 892 893 894 895 ~~896~~ 897 898 899 900 901 902 ~~903~~ 904 905 906 907 908 909 910	int ignoreCase = find_option("ignore-case","i",0)!=0; int bRobot = find_option("robot-exception",0,0)!=0; if( bRobot ){ const char *zRe; db_find_and_open_repository(0,0); verify_all_options(); zRe = db_get("robot-exception","^$"); ~~zErr = re_compile(&pRe, zRe, ignoreCase);~~ iFileList = 2; }else{ verify_all_options(); if( g.argc<3 ){ usage("REGEXP [FILE...]"); } ~~zErr = re_compile(&pRe, g.argv[2], ignoreCase);~~ } if( zErr ) fossil_fatal("%s", zErr); if( g.argc==iFileList ){ grep_file(pRe, "-", stdin); }else{ int i; for(i=iFileList; i<g.argc; i++){	\| \|	923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944	int ignoreCase = find_option("ignore-case","i",0)!=0; int bRobot = find_option("robot-exception",0,0)!=0; if( bRobot ){ const char *zRe; db_find_and_open_repository(0,0); verify_all_options(); zRe = db_get("robot-exception","^$"); zErr = fossil_re_compile(&pRe, zRe, ignoreCase); iFileList = 2; }else{ verify_all_options(); if( g.argc<3 ){ usage("REGEXP [FILE...]"); } zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); } if( zErr ) fossil_fatal("%s", zErr); if( g.argc==iFileList ){ grep_file(pRe, "-", stdin); }else{ int i; for(i=iFileList; i<g.argc; i++){
︙			︙
978 979 980 981 982 983 984 ~~985~~ 986 987 988 989 990 991 992	flags \|= GREP_QUIET\|GREP_EXISTS; } db_find_and_open_repository(0, 0); verify_all_options(); if( g.argc<4 ){ usage("REGEXP FILENAME ..."); } ~~zErr = re_compile(&pRe, g.argv[2], ignoreCase);~~ if( zErr ) fossil_fatal("%s", zErr); add_content_sql_commands(g.db); db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);"); for(ii=3; ii<g.argc; ii++){ const char *zTarget = g.argv[ii]; if( file_tree_name(zTarget, &fullName, 0, 1) ){	\|	1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026	flags \|= GREP_QUIET\|GREP_EXISTS; } db_find_and_open_repository(0, 0); verify_all_options(); if( g.argc<4 ){ usage("REGEXP FILENAME ..."); } zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); if( zErr ) fossil_fatal("%s", zErr); add_content_sql_commands(g.db); db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);"); for(ii=3; ii<g.argc; ii++){ const char *zTarget = g.argv[ii]; if( file_tree_name(zTarget, &fullName, 0, 1) ){
︙			︙

︙			︙
2143 2144 2145 2146 2147 2148 2149 ~~2150~~ 2151 2152 2153 2154 2155 2156 2157	if( fossil_strcmp(argv[nArg], "-nocase")==0 ){ noCase = 1; nArg++; } if( fossil_strcmp(argv[nArg], "--")==0 ) nArg++; if( nArg+2!=argc ){ return Th_WrongNumArgs(interp, REGEXP_WRONGNUMARGS); } ~~zErr = re_compile(&pRe, argv[nArg], noCase);~~ if( !zErr ){ Th_SetResultInt(interp, re_match(pRe, (const unsigned char *)argv[nArg+1], TH1_LEN(argl[nArg+1]))); rc = TH_OK; }else{ Th_SetResult(interp, zErr, -1); rc = TH_ERROR;	\|	2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157	if( fossil_strcmp(argv[nArg], "-nocase")==0 ){ noCase = 1; nArg++; } if( fossil_strcmp(argv[nArg], "--")==0 ) nArg++; if( nArg+2!=argc ){ return Th_WrongNumArgs(interp, REGEXP_WRONGNUMARGS); } zErr = fossil_re_compile(&pRe, argv[nArg], noCase); if( !zErr ){ Th_SetResultInt(interp, re_match(pRe, (const unsigned char *)argv[nArg+1], TH1_LEN(argl[nArg+1]))); rc = TH_OK; }else{ Th_SetResult(interp, zErr, -1); rc = TH_ERROR;
︙			︙
2200 2201 2202 2203 2204 2205 2206 ~~2207~~ 2208 2209 2210 2211 2212 2213 2214	url_parse_local(argv[nArg], 0, &urlData); if( urlData.isSsh \|\| urlData.isFile ){ Th_ErrorMessage(interp, "url must be http:// or https://", 0, 0); return TH_ERROR; } zRegexp = db_get("th1-uri-regexp", 0); if( zRegexp && zRegexp[0] ){ ~~const char zErr = re_compile(&pRe, zRegexp, 0);~~ if( zErr ){ Th_SetResult(interp, zErr, -1); return TH_ERROR; } } if( !pRe \|\| !re_match(pRe, (const unsigned char )urlData.canonical, -1) ){ Th_SetResult(interp, "url not allowed", -1);	\|	2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214	url_parse_local(argv[nArg], 0, &urlData); if( urlData.isSsh \|\| urlData.isFile ){ Th_ErrorMessage(interp, "url must be http:// or https://", 0, 0); return TH_ERROR; } zRegexp = db_get("th1-uri-regexp", 0); if( zRegexp && zRegexp[0] ){ const char zErr = fossil_re_compile(&pRe, zRegexp, 0); if( zErr ){ Th_SetResult(interp, zErr, -1); return TH_ERROR; } } if( !pRe \|\| !re_match(pRe, (const unsigned char )urlData.canonical, -1) ){ Th_SetResult(interp, "url not allowed", -1);
︙			︙