Fossil

Check-in [83284480a3]
Login

Check-in [83284480a3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhancements to the UserAgent bot recognizer. We discovered earlier today on the Fossil server itself that it is very important not to misclassify bots as human since a spider that downloads every possible historical annotation and tarball and zip archive and diff can really load up a server and soak up a lot of bandwidth.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 83284480a39ba70863834c7bbf1407968ad2cf5e
User & Date: drh 2011-11-25 16:11:17.566
Context
2011-11-25
18:54
Change the version number to 1.21 in preparation for the next release. Begin constructing a change log. ... (check-in: 53db205302 user: drh tags: trunk)
16:11
Enhancements to the UserAgent bot recognizer. We discovered earlier today on the Fossil server itself that it is very important not to misclassify bots as human since a spider that downloads every possible historical annotation and tarball and zip archive and diff can really load up a server and soak up a lot of bandwidth. ... (check-in: 83284480a3 user: drh tags: trunk)
09:36
Disallow the word "rawl" (as in crawler) in the user agent. ... (check-in: fe075f5d89 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/login.c.
353
354
355
356
357
358
359
360
361
362
363
364


365
366

367
368
369





370


371


372
373
374
375
376
377
378
379
      */
      ;
  }
}

/*
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
** is a manually operated browser or a bot.  When in doubt, assume a bot.  Return
** true if we believe the agent is a real person.
*/
static int isHuman(const char *zAgent){
  int i;


  if( zAgent==0 ) return 0;
  for(i=0; zAgent[i]; i++){

    if( zAgent[i]=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
    if( zAgent[i]=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
    if( zAgent[i]=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0;





  }


  if( memcmp(zAgent, "Mozilla/", 8)==0 ){


    return atoi(&zAgent[8])>=4;
  }
  if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
  if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
  if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
  return 0;
}








|
|



>
>
|

>
|
|
|
>
>
>
>
>
|
>
>

>
>
|







353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
      */
      ;
  }
}

/*
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
** is a manually operated browser or a bot.  When in doubt, assume a bot.
** Return true if we believe the agent is a real person.
*/
static int isHuman(const char *zAgent){
  int i;
  int seenCompatible = 0;
  int seenIE = 0;
  if( zAgent==0 ) return 0;  /* If not UserAgent, the probably a bot */
  for(i=0; zAgent[i]; i++){
    char c = zAgent[i];
    if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
    if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
    if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
    /* Anything that puts a URL in the UserAgent string is probably a bot */
    if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0;
    if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){
      seenCompatible = 1;
      i+=10;
    }
    if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1;
  }
  if( memcmp(zAgent, "Mozilla/", 8)==0 ){
    if( atoi(&zAgent[8])<4 ) return 0;  /* Many bots advertise as Mozilla/3 */
    if( seenCompatible && !seenIE ) return 0;
    return 1;
  }
  if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
  if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
  if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
  return 0;
}

Changes to src/style.c.
891
892
893
894
895
896
897

898
899
900
901
902
903
904
    if( login_has_capability(&c, 1) ) zCap[i++] = c;
  }
  zCap[i] = 0;
  @ g.userUid = %d(g.userUid)<br />
  @ g.zLogin = %h(g.zLogin)<br />
  @ capabilities = %s(zCap)<br />
  @ <hr>

  cgi_print_all(atoi(PD("showall","0")));
  if( g.perm.Setup ){
    const char *zRedir = P("redirect");
    if( zRedir ) cgi_redirect(zRedir);
  }
  style_footer();
}







>







891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
    if( login_has_capability(&c, 1) ) zCap[i++] = c;
  }
  zCap[i] = 0;
  @ g.userUid = %d(g.userUid)<br />
  @ g.zLogin = %h(g.zLogin)<br />
  @ capabilities = %s(zCap)<br />
  @ <hr>
  P("HTTP_USER_AGENT");
  cgi_print_all(atoi(PD("showall","0")));
  if( g.perm.Setup ){
    const char *zRedir = P("redirect");
    if( zRedir ) cgi_redirect(zRedir);
  }
  style_footer();
}