Check-in [87368b3efd]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Provide the ability to make exceptions to maximum number of query parameters on the robot restrictor.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 87368b3efd0b3a5617b59f089975566896a868d133f69aed5e97919e6aa6c063
User & Date: drh 2024-11-22 14:29:46.125
Context
2024-11-23
17:22
Bug fix for allowing SSH command to be overridden once for sync operations. check-in: 4c6e394d1e user: andybradford tags: trunk
17:19
Prepare for merge into trunk. Closed-Leaf check-in: c88ed3e191 user: andybradford tags: ssh-command-once
2024-11-22
14:29
Provide the ability to make exceptions to maximum number of query parameters on the robot restrictor. check-in: 87368b3efd user: drh tags: trunk
2024-11-21
13:34
Strengthen the file_is_canonical() routine so that it returns false on Windows if the pathname does not begin with a drive letter. check-in: f6ff25e1b7 user: drh tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/login.c.
1300
1301
1302
1303
1304
1305
1306

1307
1308
1309
1310
1311
1312
1313
1314

1315
1316










1317
1318
1319
1320
1321
1322
1323
**                      are applied if this setting is undefined or is
**                      an empty string.
*/
void login_restrict_robot_access(void){
  const char *zReferer;
  const char *zGlob;
  int isMatch = 1;

  if( g.zLogin!=0 ) return;
  zGlob = db_get("robot-restrict",0);
  if( zGlob==0 || zGlob[0]==0 ) return;
  if( g.isHuman ){
    zReferer = P("HTTP_REFERER");
    if( zReferer && zReferer[0]!=0 ) return;
  }
  if( cgi_qp_count()<1 ) return;

  isMatch = glob_multi_match(zGlob, g.zPath);
  if( !isMatch ) return;











  /* If we reach this point, it means we have a situation where we
  ** want to restrict the activity of a robot.
  */
  g.isHuman = 0;
  (void)exclude_spiders(0);
  cgi_reply();







>







|
>


>
>
>
>
>
>
>
>
>
>







1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
**                      are applied if this setting is undefined or is
**                      an empty string.
*/
void login_restrict_robot_access(void){
  const char *zReferer;
  const char *zGlob;
  int isMatch = 1;
  int nQP;  /* Number of query parameters other than name= */
  if( g.zLogin!=0 ) return;
  zGlob = db_get("robot-restrict",0);
  if( zGlob==0 || zGlob[0]==0 ) return;
  if( g.isHuman ){
    zReferer = P("HTTP_REFERER");
    if( zReferer && zReferer[0]!=0 ) return;
  }
  nQP = cgi_qp_count();
  if( nQP<1 ) return;
  isMatch = glob_multi_match(zGlob, g.zPath);
  if( !isMatch ) return;

  /* Check for exceptions to the restriction on the number of query
  ** parameters. */
  zGlob = db_get("robot-restrict-qp",0);
  if( zGlob && zGlob[0] ){
    char *zPath = mprintf("%s/%d", g.zPath, nQP);
    isMatch = glob_multi_match(zGlob, zPath);
    fossil_free(zPath);
    if( isMatch ) return;
  }

  /* If we reach this point, it means we have a situation where we
  ** want to restrict the activity of a robot.
  */
  g.isHuman = 0;
  (void)exclude_spiders(0);
  cgi_reply();
Changes to src/setup.c.
499
500
501
502
503
504
505
506
507
508











509
510
511
512
513
514
515
  @ parameters. Some robots will spend hours juggling around query parameters
  @ or even forging fake query parameters in an effort to discover new
  @ behavior or to find an SQL injection opportunity or similar.  This can
  @ waste hours of CPU time and gigabytes of bandwidth on the server.  A
  @ suggested value for this setting is:
  @ "<tt>timeline,*diff,vpatch,annotate,blame,praise,dir,tree</tt>".
  @ (Property: robot-restrict)
  @ <p>
  textarea_attribute("", 2, 80,
      "robot-restrict", "rbrestrict", "", 0);












  @ <hr>
  @ <p><input type="submit"  name="submit" value="Apply Changes"></p>
  @ </div></form>
  db_end_transaction(0);
  style_finish_page();
}







|


>
>
>
>
>
>
>
>
>
>
>







499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
  @ parameters. Some robots will spend hours juggling around query parameters
  @ or even forging fake query parameters in an effort to discover new
  @ behavior or to find an SQL injection opportunity or similar.  This can
  @ waste hours of CPU time and gigabytes of bandwidth on the server.  A
  @ suggested value for this setting is:
  @ "<tt>timeline,*diff,vpatch,annotate,blame,praise,dir,tree</tt>".
  @ (Property: robot-restrict)
  @ <br>
  textarea_attribute("", 2, 80,
      "robot-restrict", "rbrestrict", "", 0);
  @ <br> The following comma-separated GLOB pattern allows for exceptions
  @ in the maximum number of query parameters before a request is considered
  @ complex.  If this GLOB pattern exists and is non-empty and if it
  @ matches against the pagename followed by "/" and the number of query
  @ parameters, then the request is allowed through.  For example, the
  @ suggested pattern of "timeline/[012]" allows the /timeline page to
  @ pass with up to 2 query parameters besides "name".
  @ (Property: robot-restrict-qp)
  @ <br>
  textarea_attribute("", 2, 80,
      "robot-restrict-qp", "rbrestrictqp", "", 0);

  @ <hr>
  @ <p><input type="submit"  name="submit" value="Apply Changes"></p>
  @ </div></form>
  db_end_transaction(0);
  style_finish_page();
}
Changes to src/timeline.c.
1861
1862
1863
1864
1865
1866
1867

1868
1869
1870
1871
1872
1873
1874
  int disableY = 0;                   /* Disable type selector on submenu */
  int advancedMenu = 0;               /* Use the advanced menu design */
  char *zPlural;                      /* Ending for plural forms */
  int showCherrypicks = 1;            /* True to show cherrypick merges */
  int haveParameterN;                 /* True if n= query parameter present */
  int from_to_mode = 0;               /* 0: from,to. 1: from,ft 2: from,bt */


  url_initialize(&url, "timeline");
  cgi_query_parameters_to_url(&url);

  (void)P_NoBot("ss")
    /* "ss" is processed via the udc but at least one spider likes to
    ** try to SQL inject via this argument, so let's catch that. */;








>







1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
  int disableY = 0;                   /* Disable type selector on submenu */
  int advancedMenu = 0;               /* Use the advanced menu design */
  char *zPlural;                      /* Ending for plural forms */
  int showCherrypicks = 1;            /* True to show cherrypick merges */
  int haveParameterN;                 /* True if n= query parameter present */
  int from_to_mode = 0;               /* 0: from,to. 1: from,ft 2: from,bt */

  login_check_credentials();
  url_initialize(&url, "timeline");
  cgi_query_parameters_to_url(&url);

  (void)P_NoBot("ss")
    /* "ss" is processed via the udc but at least one spider likes to
    ** try to SQL inject via this argument, so let's catch that. */;

1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955

  /* To view the timeline, must have permission to read project data.
  */
  pd_rid = name_choice("dp","dp2",&zDPName);
  if( pd_rid ){
    p_rid = d_rid = pd_rid;
  }
  login_check_credentials();
  if( (!g.perm.Read && !g.perm.RdTkt && !g.perm.RdWiki && !g.perm.RdForum)
   || (bisectLocal && !g.perm.Setup)
  ){
    login_needed(g.anon.Read && g.anon.RdTkt && g.anon.RdWiki);
    return;
  }
  if( !bisectLocal ){







<







1942
1943
1944
1945
1946
1947
1948

1949
1950
1951
1952
1953
1954
1955

  /* To view the timeline, must have permission to read project data.
  */
  pd_rid = name_choice("dp","dp2",&zDPName);
  if( pd_rid ){
    p_rid = d_rid = pd_rid;
  }

  if( (!g.perm.Read && !g.perm.RdTkt && !g.perm.RdWiki && !g.perm.RdForum)
   || (bisectLocal && !g.perm.Setup)
  ){
    login_needed(g.anon.Read && g.anon.RdTkt && g.anon.RdWiki);
    return;
  }
  if( !bisectLocal ){