Fossil

Diff
Login

Differences From Artifact [86ac22cd75]:

To Artifact [8a76d90ae7]:


262
263
264
265
266
267
268


















269
270
271
272
273
274
275
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







** also covers /tarball and /sqlar.  If a tag has an "X" character appended,
** then it only applies if query parameters are such that the page is
** particularly difficult to compute. In all other case, the tag should
** exactly match the page name.
**
** Change this setting "off" to disable all robot restrictions.
*/
/*
** SETTING: robot-exception              width=40 block-text
**
** The value of this setting should be a regular expression.
** If it matches the REQUEST_URI without the SCRIPT_NAME prefix
** matches this regular expression, then the request is an exception
** to anti-robot defenses and should be allowed through.  For
** example, to allow robots to download tarballs or ZIP archives
** for named versions and releases, you could use an expression like
** this:
**
**     ^/(tarball|zip)\\b*\\b(version-|release)\\b
**
** This setting can hold multiple regular expressions, one
** regular expression per line.  The input URL is exempted from
** anti-robot defenses if any of the multiple regular expressions
** matches.
*/

/*
** Return the default restriction GLOB
*/
const char *robot_restrict_default(void){
  return "timelineX,diff,annotate,zip,fileage,file,finfo";
}
285
286
287
288
289
290
291







































































292
293
294
295
296
297
298
299
300
301
302
303
304
305
306




307
308
309
310
311
312
313
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+















+
+
+
+







    if( zGlob==0 ) zGlob = "";
  }
  if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
    return 0;
  }
  return glob_multi_match(zGlob,zTag);
}

/*
** Check the request URI to see if it matches one of the URI
** exceptions listed in the robot-exception setting.  Return true
** if it does.  Return false if it does not.
**
** For the purposes of this routine, the "request URI" means
** the REQUEST_URI value with the SCRIPT_NAME prefix removed and
** with QUERY_STRING appended with a "?" separator if QUERY_STRING
** is not empty.
**
** If the robot-exception setting does not exist or is an empty
** string, then return false.
*/
int robot_exception(void){
  const char *zRE = db_get("robot-exception",0);
  const char *zQS;    /* QUERY_STRING */
  const char *zURI;   /* REQUEST_URI */
  const char *zSN;    /* SCRIPT_NAME */
  const char *zNL;    /* Next newline character */
  char *zRequest;     /* REQUEST_URL w/o SCRIPT_NAME prefix + QUERY_STRING */
  int nRequest;       /* Length of zRequest in bytes */
  size_t nURI, nSN;   /* Length of zURI and zSN */
  int bMatch = 0;     /* True if there is a match */

  if( zRE==0 ) return 0;
  if( zRE[0]==0 ) return 0;
  zURI = PD("REQUEST_URI","");
  nURI = strlen(zURI);
  zSN = PD("SCRIPT_NAME","");
  nSN = strlen(zSN);
  if( nSN<=nURI ) zURI += nSN;
  zQS = P("QUERY_STRING");
  if( zQS && zQS[0] ){
    zRequest = mprintf("%s?%s", zURI, zQS);
  }else{
    zRequest = fossil_strdup(zURI);
  }
  nRequest = (int)strlen(zRequest);
  while( zRE[0] && bMatch==0 ){
    char *z;
    const char *zErr;
    size_t n;
    ReCompiled *pRe;
    zNL = strchr(zRE,'\n');
    if( zNL ){
      n = (size_t)(zNL - zRE)+1;
      while( zNL>zRE && fossil_isspace(zNL[0]) ) zNL--;
      if( zNL==zRE ){
        zRE += n;
        continue;
      }
    }else{
      n = strlen(zRE);
    }
    z = mprintf("%.*s", (int)(zNL - zRE)+1, zRE);
    zRE += n;
    zErr = re_compile(&pRe, z, 0);
    if( zErr ){
      fossil_warning("robot-exception error \"%s\" in expression \"%s\"\n",
                     zErr, z);
      fossil_free(z);
      continue;
    }
    fossil_free(z);
    bMatch = re_match(pRe, (const unsigned char*)zRequest, nRequest);
    re_free(pRe);
  }
  fossil_free(zRequest);
  return bMatch;
}

/*
** Check to see if the page named in the argument is on the
** robot-restrict list.  If it is on the list and if the user
** is "nobody" then bring up a captcha to test to make sure that
** client is not a robot.
**
** This routine returns true if a captcha was rendered and if subsequent
** page generation should be aborted.  It returns false if the page
** should not be restricted and should be rendered normally.
*/
int robot_restrict(const char *zTag){
  if( robot.resultCache==KNOWN_NOT_ROBOT ) return 0;
  if( !robot_restrict_has_tag(zTag) ) return 0;
  if( !client_might_be_a_robot() ) return 0;
  if( robot_exception() ){
    robot.resultCache = KNOWN_NOT_ROBOT;
    return 0;
  }

  /* Generate the proof-of-work captcha */   
  ask_for_proof_that_client_is_not_robot();
  return 1;
}

/*