Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Add the robot-exception setting. |
|---|---|
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
86b6ef7fe3424284a38f9706482c58b4 |
| User & Date: | drh 2025-08-21 14:08:29.274 |
Context
|
2025-08-21
| ||
| 14:13 | Add /reports to the default robot-restrict setting. check-in: 12d871a00a user: stephan tags: trunk | |
| 14:08 | Add the robot-exception setting. check-in: 86b6ef7fe3 user: drh tags: trunk | |
| 12:02 | Change [3710202914] to call the function to load the diff-related JS code even for blocked diffs. By default, the loader function is already a no-op if diffs are blocked, so the behavior intended by [3710202914] is retained. But other branches are patching the loader function because they rely on the JS code even if the diffs are hidden. check-in: 171127fd14 user: florian tags: trunk | |
Changes
Changes to src/regexp.c.
| ︙ | ︙ | |||
848 849 850 851 852 853 854 855 856 857 858 859 |
** Usage: %fossil test-grep REGEXP [FILE...]
**
** Run a regular expression match over the named disk files, or against
** standard input if no disk files are named on the command-line.
**
** Options:
** -i|--ignore-case Ignore case
*/
void re_test_grep(void){
ReCompiled *pRe;
const char *zErr;
int ignoreCase = find_option("ignore-case","i",0)!=0;
| > > > > > > > > > > > > | | | | > | | | 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 |
** Usage: %fossil test-grep REGEXP [FILE...]
**
** Run a regular expression match over the named disk files, or against
** standard input if no disk files are named on the command-line.
**
** Options:
** -i|--ignore-case Ignore case
** --robot-exception Use the robot-exception setting as the REGEXP
*/
void re_test_grep(void){
ReCompiled *pRe;
const char *zErr;
int iFileList = 3;
int ignoreCase = find_option("ignore-case","i",0)!=0;
int bRobot = find_option("robot-exception",0,0)!=0;
if( bRobot ){
const char *zRe;
db_find_and_open_repository(0,0);
verify_all_options();
zRe = db_get("robot-exception","^$");
zErr = re_compile(&pRe, zRe, ignoreCase);
iFileList = 2;
}else{
verify_all_options();
if( g.argc<3 ){
usage("REGEXP [FILE...]");
}
zErr = re_compile(&pRe, g.argv[2], ignoreCase);
}
if( zErr ) fossil_fatal("%s", zErr);
if( g.argc==iFileList ){
grep_file(pRe, "-", stdin);
}else{
int i;
for(i=iFileList; i<g.argc; i++){
FILE *in = fossil_fopen(g.argv[i], "rb");
if( in==0 ){
fossil_warning("cannot open \"%s\"", g.argv[i]);
}else{
grep_file(pRe, g.argv[i], in);
fclose(in);
}
|
| ︙ | ︙ |
Changes to src/robot.c.
| ︙ | ︙ | |||
262 263 264 265 266 267 268 269 270 271 272 273 274 275 |
** also covers /tarball and /sqlar. If a tag has an "X" character appended,
** then it only applies if query parameters are such that the page is
** particularly difficult to compute. In all other case, the tag should
** exactly match the page name.
**
** Change this setting "off" to disable all robot restrictions.
*/
/*
** Return the default restriction GLOB
*/
const char *robot_restrict_default(void){
return "timelineX,diff,annotate,zip,fileage,file,finfo";
}
| > > > > > > > > > > > > > > > > > > | 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
** also covers /tarball and /sqlar. If a tag has an "X" character appended,
** then it only applies if query parameters are such that the page is
** particularly difficult to compute. In all other case, the tag should
** exactly match the page name.
**
** Change this setting "off" to disable all robot restrictions.
*/
/*
** SETTING: robot-exception width=40 block-text
**
** The value of this setting should be a regular expression.
** If it matches the REQUEST_URI without the SCRIPT_NAME prefix
** matches this regular expression, then the request is an exception
** to anti-robot defenses and should be allowed through. For
** example, to allow robots to download tarballs or ZIP archives
** for named versions and releases, you could use an expression like
** this:
**
** ^/(tarball|zip)\\b*\\b(version-|release)\\b
**
** This setting can hold multiple regular expressions, one
** regular expression per line. The input URL is exempted from
** anti-robot defenses if any of the multiple regular expressions
** matches.
*/
/*
** Return the default restriction GLOB
*/
const char *robot_restrict_default(void){
return "timelineX,diff,annotate,zip,fileage,file,finfo";
}
|
| ︙ | ︙ | |||
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
if( zGlob==0 ) zGlob = "";
}
if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
return 0;
}
return glob_multi_match(zGlob,zTag);
}
/*
** Check to see if the page named in the argument is on the
** robot-restrict list. If it is on the list and if the user
** is "nobody" then bring up a captcha to test to make sure that
** client is not a robot.
**
** This routine returns true if a captcha was rendered and if subsequent
** page generation should be aborted. It returns false if the page
** should not be restricted and should be rendered normally.
*/
int robot_restrict(const char *zTag){
if( robot.resultCache==KNOWN_NOT_ROBOT ) return 0;
if( !robot_restrict_has_tag(zTag) ) return 0;
if( !client_might_be_a_robot() ) return 0;
/* Generate the proof-of-work captcha */
ask_for_proof_that_client_is_not_robot();
return 1;
}
/*
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
if( zGlob==0 ) zGlob = "";
}
if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
return 0;
}
return glob_multi_match(zGlob,zTag);
}
/*
** Check the request URI to see if it matches one of the URI
** exceptions listed in the robot-exception setting. Return true
** if it does. Return false if it does not.
**
** For the purposes of this routine, the "request URI" means
** the REQUEST_URI value with the SCRIPT_NAME prefix removed and
** with QUERY_STRING appended with a "?" separator if QUERY_STRING
** is not empty.
**
** If the robot-exception setting does not exist or is an empty
** string, then return false.
*/
int robot_exception(void){
const char *zRE = db_get("robot-exception",0);
const char *zQS; /* QUERY_STRING */
const char *zURI; /* REQUEST_URI */
const char *zSN; /* SCRIPT_NAME */
const char *zNL; /* Next newline character */
char *zRequest; /* REQUEST_URL w/o SCRIPT_NAME prefix + QUERY_STRING */
int nRequest; /* Length of zRequest in bytes */
size_t nURI, nSN; /* Length of zURI and zSN */
int bMatch = 0; /* True if there is a match */
if( zRE==0 ) return 0;
if( zRE[0]==0 ) return 0;
zURI = PD("REQUEST_URI","");
nURI = strlen(zURI);
zSN = PD("SCRIPT_NAME","");
nSN = strlen(zSN);
if( nSN<=nURI ) zURI += nSN;
zQS = P("QUERY_STRING");
if( zQS && zQS[0] ){
zRequest = mprintf("%s?%s", zURI, zQS);
}else{
zRequest = fossil_strdup(zURI);
}
nRequest = (int)strlen(zRequest);
while( zRE[0] && bMatch==0 ){
char *z;
const char *zErr;
size_t n;
ReCompiled *pRe;
zNL = strchr(zRE,'\n');
if( zNL ){
n = (size_t)(zNL - zRE)+1;
while( zNL>zRE && fossil_isspace(zNL[0]) ) zNL--;
if( zNL==zRE ){
zRE += n;
continue;
}
}else{
n = strlen(zRE);
}
z = mprintf("%.*s", (int)(zNL - zRE)+1, zRE);
zRE += n;
zErr = re_compile(&pRe, z, 0);
if( zErr ){
fossil_warning("robot-exception error \"%s\" in expression \"%s\"\n",
zErr, z);
fossil_free(z);
continue;
}
fossil_free(z);
bMatch = re_match(pRe, (const unsigned char*)zRequest, nRequest);
re_free(pRe);
}
fossil_free(zRequest);
return bMatch;
}
/*
** Check to see if the page named in the argument is on the
** robot-restrict list. If it is on the list and if the user
** is "nobody" then bring up a captcha to test to make sure that
** client is not a robot.
**
** This routine returns true if a captcha was rendered and if subsequent
** page generation should be aborted. It returns false if the page
** should not be restricted and should be rendered normally.
*/
int robot_restrict(const char *zTag){
if( robot.resultCache==KNOWN_NOT_ROBOT ) return 0;
if( !robot_restrict_has_tag(zTag) ) return 0;
if( !client_might_be_a_robot() ) return 0;
if( robot_exception() ){
robot.resultCache = KNOWN_NOT_ROBOT;
return 0;
}
/* Generate the proof-of-work captcha */
ask_for_proof_that_client_is_not_robot();
return 1;
}
/*
|
| ︙ | ︙ |
Changes to src/setup.c.
| ︙ | ︙ | |||
475 476 477 478 479 480 481 | @ <p>The settings on this page are intended to help site administrators @ defend the site against robots. @ @ <form action="%R/setup_robot" method="post"><div> login_insert_csrf_secret(); @ <input type="submit" name="submit" value="Apply Changes"></p> @ <hr> | | | > > > > > > > > > > > > > > > | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 |
@ <p>The settings on this page are intended to help site administrators
@ defend the site against robots.
@
@ <form action="%R/setup_robot" method="post"><div>
login_insert_csrf_secret();
@ <input type="submit" name="submit" value="Apply Changes"></p>
@ <hr>
@ <p><b>Do not allow robots access to these pages.</b><br>
@ If the page name matches the GLOB pattern of this setting, and the
@ users is "nobody", and the client has not previously passed a captcha
@ test to show that it is not a robot, then the page is not displayed.
@ A captcha test is is rendered instead.
@ The recommended value for this setting is:
@ <p>
@    <tt>%h(robot_restrict_default())</tt>
@ <p>
@ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
@ /vpatch. The "annotate" tag covers /annotate and also /blame and
@ /praise. The "zip" covers itself and also /tarball and /sqlar. If a
@ tag has an "X" character appended, then it only applies if query
@ parameters are such that the page is particularly difficult to compute.
@ In all other case, the tag should exactly match the page name.
@
@ To disable robot restrictions, change this setting to "off".
@ (Property: robot-restrict)
@ <br>
textarea_attribute("", 2, 80,
"robot-restrict", "rbrestrict", robot_restrict_default(), 0);
@ <hr>
@ <p><b>Exceptions to anti-robot restrictions</b><br>
@ The entry below is a list of regular expressions, one per line.
@ If any of these regular expressions match the input URL, then the
@ request is exempt from anti-robot defenses. Use this, for example,
@ to allow scripts to download release tarballs using a pattern
@ like:</p>
@ <p>
@   <tt>^/tarball\\b*\\b(version-|release)\\b</tt>
@ <p>The pattern should match against the REQUEST_URI with the
@ SCRIPT_NAME prefix removed, and with QUERY_STRING appended following
@ a "?" if QUERY_STRING exists. (Property: robot-exception)<br>
textarea_attribute("", 3, 80,
"robot-exception", "rbexcept", "", 0);
@ <hr>
addAutoHyperlinkSettings();
@ <hr>
entry_attribute("Anonymous Login Validity", 11, "anon-cookie-lifespan",
"anoncookls", "840", 0);
@ <p>The number of minutes for which an anonymous login cookie is valid.
|
| ︙ | ︙ |