| ︙ | | |
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
+
-
+
+
+
+
+
+
+
+
|
struct Search {
int nTerm; /* Number of search terms */
struct srchTerm { /* For each search term */
char *z; /* Text */
int n; /* length */
} a[SEARCH_MAX_TERM];
/* Snippet controls */
char *zPattern; /* The search pattern */
char *zMarkBegin; /* Start of a match */
char *zMarkEnd; /* End of a match */
char *zMarkGap; /* A gap between two matches */
unsigned fSrchFlg; /* Flags */
};
#define SRCHFLG_HTML 0x0001 /* Escape snippet text for HTML */
#define SRCHFLG_HTML 0x01 /* Escape snippet text for HTML */
#define SRCHFLG_SCORE 0x02 /* Prepend the score to each snippet */
#define SRCHFLG_STATIC 0x04 /* The static gSearch object */
#endif
/*
** There is a single global Search object:
*/
static Search gSearch;
/*
** Theses characters constitute a word boundary
*/
static const char isBoundary[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
| ︙ | | |
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
-
-
-
-
+
+
+
+
+
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
#define ISALNUM(x) (!isBoundary[(x)&0xff])
/*
** Destroy a search context.
*/
void search_end(Search *p){
if( p ){
fossil_free(p->zPattern);
fossil_free(p->zMarkBegin);
fossil_free(p->zMarkEnd);
fossil_free(p->zMarkGap);
memset(p, 0, sizeof(*p));
if( p!=&gSearch ) fossil_free(p);
}
}
/*
** Compile a search pattern
*/
Search *search_init(const char *zPattern){
int nPattern = strlen(zPattern);
Search *search_init(
const char *zPattern, /* The search pattern */
const char *zMarkBegin, /* Start of a match */
const char *zMarkEnd, /* End of a match */
const char *zMarkGap, /* A gap between two matches */
unsigned fSrchFlg /* Flags */
){
Search *p;
char *z;
int i;
if( fSrchFlg & SRCHFLG_STATIC ){
p = &gSearch;
search_end(p);
}else{
p = fossil_malloc( nPattern + sizeof(*p) + 1);
p = fossil_malloc(sizeof(*p));
z = (char*)&p[1];
memcpy(z, zPattern, nPattern+1);
memset(p, 0, sizeof(*p));
memset(p, 0, sizeof(*p));
}
p->zPattern = z = mprintf("%s", zPattern);
p->zMarkBegin = mprintf("%s", zMarkBegin);
p->zMarkEnd = mprintf("%s", zMarkEnd);
p->zMarkGap = mprintf("%s", zMarkGap);
p->fSrchFlg = fSrchFlg;
while( *z && p->nTerm<SEARCH_MAX_TERM ){
while( *z && !ISALNUM(*z) ){ z++; }
if( *z==0 ) break;
p->a[p->nTerm].z = z;
for(i=1; ISALNUM(z[i]); i++){}
p->a[p->nTerm].n = i;
z += i;
p->nTerm++;
}
return p;
}
/*
** Destroy a search context.
*/
void search_end(Search *p){
free(p);
}
/*
** Append n bytes of text to snippet zTxt. Encode the text appropriately.
*/
static void snippet_text_append(
Search *p, /* The search context */
Blob *pSnip, /* Append to this snippet */
const char *zTxt, /* Text to append */
int n /* How many bytes to append */
){
if( n>0 ){
if( p->fSrchFlg & SRCHFLG_HTML ){
blob_appendf(pSnip, "%.*h", n, zTxt);
}else{
blob_append(pSnip, zTxt, n);
if( p->fSrchFlg & SRCHFLG_HTML ){
blob_appendf(pSnip, "%#h", n, zTxt);
}else{
blob_append(pSnip, zTxt, n);
}
}
}
/*
** Compare a search pattern against one or more input strings which
** collectively comprise a document. Return a match score. Optionally
** also return a "snippet".
|
| ︙ | | |
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
+
|
for(j=0; j<p->nTerm; j++) score *= anMatch[j];
if( score==0 || pSnip==0 ) return score;
/* Prepare a snippet that describes the matching text.
*/
blob_init(pSnip, 0, 0);
if( p->fSrchFlg & SRCHFLG_SCORE ) blob_appendf(pSnip, "%08x", score);
while(1){
int iOfst;
int iTail;
int iBest;
for(ii=0; ii<p->nTerm && anMatch[ii]==0; ii++){}
if( ii>=p->nTerm ) break; /* This is where the loop exits */
|
| ︙ | | |
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
|
-
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
+
+
-
+
+
+
+
+
+
-
+
+
-
-
+
+
+
+
+
+
|
break;
} /* end-if */
} /* end for(j) */
if( j<p->nTerm ){
while( ISALNUM(zDoc[i]) && i<iTail ){ i++; }
}
} /* end for(i) */
if( iTail>0 ) snippet_text_append(p, pSnip, zDoc, iTail);
snippet_text_append(p, pSnip, zDoc, iTail);
}
if( wantGap ) blob_append(pSnip, p->zMarkGap, -1);
return score;
}
/*
** COMMAND: test-snippet
**
** Usage: fossil test-snippet SEARCHSTRING FILE1 FILE2 ...
*/
void test_snippet_cmd(void){
Search *p;
int i;
Blob x;
Blob snip;
int score;
char *zDoc;
int flg = 0;
char *zBegin = (char*)find_option("begin",0,1);
char *zEnd = (char*)find_option("end",0,1);
char *zGap = (char*)find_option("gap",0,1);
if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML;
if( find_option("score",0,0)!=0 ) flg |= SRCHFLG_SCORE;
if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC;
verify_all_options();
if( g.argc<4 ) usage("SEARCHSTRING FILE1...");
p = search_init(g.argv[2]);
p->zMarkBegin = "[[";
p->zMarkEnd = "]]";
p->zMarkGap = " ... ";
if( zBegin==0 ) zBegin = "[[";
if( zEnd==0 ) zEnd = "]]";
if( zGap==0 ) zGap = " ... ";
p = search_init(g.argv[2], zBegin, zEnd, zGap, flg);
for(i=3; i<g.argc; i++){
blob_read_from_file(&x, g.argv[i]);
zDoc = blob_str(&x);
score = search_score(p, 1, (const char**)&zDoc, &snip);
fossil_print("%s: %d\n", g.argv[i], score);
blob_reset(&x);
if( score ){
fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&snip), '=');
blob_reset(&snip);
}
}
}
}
/*
** An SQL function to initialize the global search pattern:
**
** search_init(PATTERN,BEGIN,END,GAP,FLAGS)
**
** All arguments are optional.
*/
static void search_init_sqlfunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
const char *zPattern = 0;
const char *zBegin = "<b>";
const char *zEnd = "</b>";
const char *zGap = " ... ";
unsigned int flg = SRCHFLG_HTML;
switch( argc ){
default:
flg = (unsigned int)sqlite3_value_int(argv[4]);
case 4:
zGap = (const char*)sqlite3_value_text(argv[3]);
case 3:
zEnd = (const char*)sqlite3_value_text(argv[2]);
case 2:
zBegin = (const char*)sqlite3_value_text(argv[1]);
case 1:
zPattern = (const char*)sqlite3_value_text(argv[0]);
}
if( zPattern && zPattern[0] ){
search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC);
}else{
search_end(&gSearch);
}
}
/*
** This is an SQLite function that scores its input using
** a pre-computed pattern.
** the pattern from the previous call to search_init().
*/
static void search_score_sqlfunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
Search *p = (Search*)sqlite3_user_data(context);
int isSnippet = sqlite3_user_data(context)!=0;
const char **azDoc;
int score;
int i;
Blob snip;
if( gSearch.nTerm==0 ) return;
azDoc = fossil_malloc( sizeof(const char*)*(argc+1) );
for(i=0; i<argc; i++) azDoc[i] = (const char*)sqlite3_value_text(argv[i]);
score = search_score(p, argc, azDoc, 0);
score = search_score(&gSearch, argc, azDoc, isSnippet ? &snip : 0);
fossil_free((void *)azDoc);
if( isSnippet ){
if( score ){
sqlite3_result_text(context, blob_materialize(&snip), -1, fossil_free);
}
}else{
sqlite3_result_int(context, score);
sqlite3_result_int(context, score);
}
}
/*
** Register the "score()" SQL function to score its input text
** using the given Search object. Once this function is registered,
** do not delete the Search object.
*/
void search_sql_setup(Search *p){
sqlite3_create_function(g.db, "score", -1, SQLITE_UTF8, p,
void search_sql_setup(sqlite3 *db){
sqlite3_create_function(db, "score", -1, SQLITE_UTF8, 0,
search_score_sqlfunc, 0, 0);
sqlite3_create_function(db, "snippet", -1, SQLITE_UTF8, &gSearch,
search_score_sqlfunc, 0, 0);
sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0,
search_init_sqlfunc, 0, 0);
}
/*
** Testing the search function.
**
** COMMAND: search*
** %fossil search [-all|-a] [-limit|-n #] [-width|-W #] pattern...
|
| ︙ | | |
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
|
443
444
445
446
447
448
449
450
451
452
453
454
455
456
|
-
|
** all matches, regardless of their search score.
** The -limit option can be used to limit the number
** of entries returned. The -width option can be
** used to set the output width used when printing
** matches.
*/
void search_cmd(void){
Search *p;
Blob pattern;
int i;
Blob sql = empty_blob;
Stmt q;
int iBest;
char fAll = NULL != find_option("all", "a", 0); /* If set, do not lop
off the end of the
|
| ︙ | | |
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
|
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
|
-
+
-
+
|
db_must_be_within_tree();
if( g.argc<2 ) return;
blob_init(&pattern, g.argv[2], -1);
for(i=3; i<g.argc; i++){
blob_appendf(&pattern, " %s", g.argv[i]);
}
p = search_init(blob_str(&pattern));
(void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC);
blob_reset(&pattern);
search_sql_setup(p);
search_sql_setup(g.db);
db_multi_exec(
"CREATE TEMP TABLE srch(rid,uuid,date,comment,x);"
"CREATE INDEX srch_idx1 ON srch(x);"
"INSERT INTO srch(rid,uuid,date,comment,x)"
" SELECT blob.rid, uuid, datetime(event.mtime%s),"
" coalesce(ecomment,comment),"
|
| ︙ | | |
412
413
414
415
416
417
418
|
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
}
blob_append(&sql, "ORDER BY x DESC, date DESC ", -1);
db_prepare(&q, "%s", blob_sql_text(&sql));
blob_reset(&sql);
print_timeline(&q, nLimit, width, 0);
db_finalize(&q);
}
/*
** WEBPAGE: /search
**
** This is an EXPERIMENTAL page for doing search across a repository.
**
** The current implementation does a full text search over embedded
** documentation files on the tip of the "trunk" branch. Only files
** ending in ".wiki", ".md", ".html", and ".txt" are searched.
**
** The entire text is scanned. There is no full-text index. This is
** experimental. We may change to using a full-text index depending
** on performance.
**
** Other pending enhancements:
** * Search tickets
** * Search wiki
*/
void search_page(void){
const char *zPattern = PD("s","");
Stmt q;
login_check_credentials();
if( !g.perm.Read ){ login_needed(); return; }
style_header("Search");
@ <form method="GET" action="search"><center>
@ <input type="text" name="s" size="40" value="%h(zPattern)">
@ <input type="submit" value="Search">
@ </center></form>
while( fossil_isspace(zPattern[0]) ) zPattern++;
if( zPattern[0] ){
search_sql_setup(g.db);
add_content_sql_commands(g.db);
search_init(zPattern, "<b>", "</b>", " ... ",
SRCHFLG_STATIC|SRCHFLG_HTML|SRCHFLG_SCORE);
db_multi_exec(
"CREATE VIRTUAL TABLE temp.foci USING files_of_checkin;"
"CREATE TEMP TABLE x(fn TEXT,url TEXT,snip TEXT);"
"INSERT INTO x(fn,url,snip)"
" SELECT filename, printf('%R/doc/trunk/%%s',filename),"
" snippet(content(uuid))"
" FROM foci"
" WHERE checkinID=symbolic_name_to_rid('trunk')"
" AND (filename GLOB '*.wiki' OR"
" filename GLOB '*.md' OR"
" filename GLOB '*.txt' OR"
" filename GLOB '*.html');"
);
db_prepare(&q, "SELECT url, substr(snip,8)"
" FROM x WHERE snip IS NOT NULL"
" ORDER BY substr(snip,1,8) DESC, fn;");
@ <ol>
while( db_step(&q)==SQLITE_ROW ){
const char *zUrl = db_column_text(&q, 0);
const char *zSnippet = db_column_text(&q, 1);
@ <li><p>%s(href("%s",zUrl))%h(zUrl)</a><br>%s(zSnippet)</li>
}
db_finalize(&q);
@ </ol>
}
style_footer();
}
|