Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Enhancements to the UserAgent bot recognizer. We discovered earlier today on the Fossil server itself that it is very important not to misclassify bots as human since a spider that downloads every possible historical annotation and tarball and zip archive and diff can really load up a server and soak up a lot of bandwidth. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
83284480a39ba70863834c7bbf140796 |
| User & Date: | drh 2011-11-25 16:11:17.566 |
Context
|
2011-11-25
| ||
| 18:54 | Change the version number to 1.21 in preparation for the next release. Begin constructing a change log. ... (check-in: 53db205302 user: drh tags: trunk) | |
| 16:11 | Enhancements to the UserAgent bot recognizer. We discovered earlier today on the Fossil server itself that it is very important not to misclassify bots as human since a spider that downloads every possible historical annotation and tarball and zip archive and diff can really load up a server and soak up a lot of bandwidth. ... (check-in: 83284480a3 user: drh tags: trunk) | |
| 09:36 | Disallow the word "rawl" (as in crawler) in the user agent. ... (check-in: fe075f5d89 user: drh tags: trunk) | |
Changes
Changes to src/login.c.
| ︙ | ︙ | |||
353 354 355 356 357 358 359 |
*/
;
}
}
/*
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
| | | > > | > | | | > > > > > | > > > > | | 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 |
*/
;
}
}
/*
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
** is a manually operated browser or a bot. When in doubt, assume a bot.
** Return true if we believe the agent is a real person.
*/
static int isHuman(const char *zAgent){
int i;
int seenCompatible = 0;
int seenIE = 0;
if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */
for(i=0; zAgent[i]; i++){
char c = zAgent[i];
if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
/* Anything that puts a URL in the UserAgent string is probably a bot */
if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0;
if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){
seenCompatible = 1;
i+=10;
}
if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1;
}
if( memcmp(zAgent, "Mozilla/", 8)==0 ){
if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
if( seenCompatible && !seenIE ) return 0;
return 1;
}
if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
return 0;
}
|
| ︙ | ︙ |
Changes to src/style.c.
| ︙ | ︙ | |||
891 892 893 894 895 896 897 898 899 900 901 902 903 904 |
if( login_has_capability(&c, 1) ) zCap[i++] = c;
}
zCap[i] = 0;
@ g.userUid = %d(g.userUid)<br />
@ g.zLogin = %h(g.zLogin)<br />
@ capabilities = %s(zCap)<br />
@ <hr>
cgi_print_all(atoi(PD("showall","0")));
if( g.perm.Setup ){
const char *zRedir = P("redirect");
if( zRedir ) cgi_redirect(zRedir);
}
style_footer();
}
| > | 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 |
if( login_has_capability(&c, 1) ) zCap[i++] = c;
}
zCap[i] = 0;
@ g.userUid = %d(g.userUid)<br />
@ g.zLogin = %h(g.zLogin)<br />
@ capabilities = %s(zCap)<br />
@ <hr>
P("HTTP_USER_AGENT");
cgi_print_all(atoi(PD("showall","0")));
if( g.perm.Setup ){
const char *zRedir = P("redirect");
if( zRedir ) cgi_redirect(zRedir);
}
style_footer();
}
|