Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Incremental check-in for work on the indexed full-text search. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | indexed-fts |
| Files: | files | file ages | folders |
| SHA1: |
ec0e590191b10f23c8b3b975b1f1ca3d |
| User & Date: | drh 2015-02-02 20:53:55.664 |
Context
|
2015-02-03
| ||
| 00:27 | Document search now works with an index. Still no configuration screens for indexed search, however. full-scan search continues to work as before. check-in: 1bad221ecb user: drh tags: indexed-fts | |
|
2015-02-02
| ||
| 20:53 | Incremental check-in for work on the indexed full-text search. check-in: ec0e590191 user: drh tags: indexed-fts | |
| 15:01 | Incremental check-in: added the "fossil test-fts fill" test command. check-in: 32d904e9cf user: drh tags: indexed-fts | |
Changes
Changes to src/db.c.
| ︙ | ︙ | |||
1378 1379 1380 1381 1382 1383 1384 |
fprintf(stderr, "-- prepared statements %10d\n", db.nPrepare);
}
while( db.pAllStmt ){
db_finalize(db.pAllStmt);
}
db_end_transaction(1);
pStmt = 0;
| < < < < < > | > > > > > | 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 |
fprintf(stderr, "-- prepared statements %10d\n", db.nPrepare);
}
while( db.pAllStmt ){
db_finalize(db.pAllStmt);
}
db_end_transaction(1);
pStmt = 0;
db_close_config();
/* If the localdb (the check-out database) is open and if it has
** a lot of unused free space, then VACUUM it as we shut down.
*/
if( g.localOpen && strcmp(db_name("localdb"),"main")==0 ){
int nFree = db_int(0, "PRAGMA main.freelist_count");
int nTotal = db_int(0, "PRAGMA main.page_count");
if( nFree>nTotal/4 ){
db_multi_exec("VACUUM;");
}
}
if( g.db ){
int rc;
sqlite3_wal_checkpoint(g.db, 0);
rc = sqlite3_close(g.db);
if( rc==SQLITE_BUSY && reportErrors ){
while( (pStmt = sqlite3_next_stmt(g.db, pStmt))!=0 ){
fossil_warning("unfinalized SQL statement: [%s]", sqlite3_sql(pStmt));
}
}
g.db = 0;
g.zMainDbType = 0;
}
g.repositoryOpen = 0;
g.localOpen = 0;
assert( g.dbConfig==0 );
assert( g.useAttach==0 );
|
| ︙ | ︙ |
Changes to src/search.c.
| ︙ | ︙ | |||
451 452 453 454 455 456 457 |
** do not delete the Search object.
*/
void search_sql_setup(sqlite3 *db){
static int once = 0;
if( once++ ) return;
sqlite3_create_function(db, "score", -1, SQLITE_UTF8, 0,
search_score_sqlfunc, 0, 0);
| | | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 |
** do not delete the Search object.
*/
void search_sql_setup(sqlite3 *db){
static int once = 0;
if( once++ ) return;
sqlite3_create_function(db, "score", -1, SQLITE_UTF8, 0,
search_score_sqlfunc, 0, 0);
sqlite3_create_function(db, "fsnippet", -1, SQLITE_UTF8, &gSearch,
search_score_sqlfunc, 0, 0);
sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0,
search_init_sqlfunc, 0, 0);
sqlite3_create_function(db, "stext", 3, SQLITE_UTF8, 0,
search_stext_sqlfunc, 0, 0);
sqlite3_create_function(db, "urlencode", 1, SQLITE_UTF8, 0,
search_urlencode_sqlfunc, 0, 0);
|
| ︙ | ︙ | |||
551 552 553 554 555 556 557 |
#endif
/*
** Remove bits from srchFlags which are disallowed by either the
** current server configuration or by user permissions.
*/
unsigned int search_restrict(unsigned int srchFlags){
| > > > > | < | < | < | < < | | > > > | > | < < < < < < < < < < < > > > | | 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 |
#endif
/*
** Remove bits from srchFlags which are disallowed by either the
** current server configuration or by user permissions.
*/
unsigned int search_restrict(unsigned int srchFlags){
if( g.perm.Read==0 ) srchFlags &= ~(SRCH_CKIN|SRCH_DOC);
if( g.perm.RdTkt==0 ) srchFlags &= ~(SRCH_TKT);
if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI);
if( search_index_exists() ) return srchFlags;
if( (srchFlags & SRCH_CKIN)!=0 && db_get_boolean("search-ci",0)==0 ){
srchFlags &= ~SRCH_CKIN;
}
if( (srchFlags & SRCH_DOC)!=0 && db_get_boolean("search-doc",0)==0 ){
srchFlags &= ~SRCH_DOC;
}
if( (srchFlags & SRCH_TKT)!=0 && db_get_boolean("search-tkt",0)==0 ){
srchFlags &= ~SRCH_TKT;
}
if( (srchFlags & SRCH_WIKI)!=0 && db_get_boolean("search-wiki",0)==0 ){
srchFlags &= ~SRCH_WIKI;
}
return srchFlags;
}
/*
** When this routine is called, there already exists a table
**
** x(label,url,score,date,snip).
**
** And the srchFlags parameter has been validated. This routine
** fills the X table with search results using a full-text scan.
**
** The companion indexed scan routine is search_indexed().
*/
static void search_fullscan(
const char *zPattern, /* The query pattern */
unsigned int srchFlags /* What to search over */
){
search_init(zPattern, "<b>", "</b>", " ... ",
SRCHFLG_STATIC|SRCHFLG_HTML|SRCHFLG_SCORE);
if( (srchFlags & SRCH_DOC)!=0 ){
char *zDocGlob = db_get("doc-glob","");
char *zDocBr = db_get("doc-branch","trunk");
if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){
db_multi_exec(
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
);
db_multi_exec(
"INSERT INTO x(label,url,date,snip)"
" SELECT printf('Document: %%s',foci.filename),"
" printf('%R/doc/%T/%%s',foci.filename),"
" (SELECT datetime(event.mtime) FROM event"
" WHERE objid=symbolic_name_to_rid('trunk')),"
" fsnippet(stext('d',blob.rid,foci.filename))"
" FROM foci CROSS JOIN blob"
" WHERE checkinID=symbolic_name_to_rid('trunk')"
" AND blob.uuid=foci.uuid"
" AND %z",
zDocBr, glob_expr("foci.filename", zDocGlob)
);
}
|
| ︙ | ︙ | |||
626 627 628 629 630 631 632 |
" AND tagxref.tagid=tag.tagid"
" GROUP BY 1"
")"
"INSERT INTO x(label,url,date,snip)"
" SELECT printf('Wiki: %%s',name),"
" printf('%R/wiki?name=%%s',urlencode(name)),"
" datetime(mtime),"
| | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 |
" AND tagxref.tagid=tag.tagid"
" GROUP BY 1"
")"
"INSERT INTO x(label,url,date,snip)"
" SELECT printf('Wiki: %%s',name),"
" printf('%R/wiki?name=%%s',urlencode(name)),"
" datetime(mtime),"
" fsnippet(stext('w',rid,name))"
" FROM wiki;"
);
}
if( (srchFlags & SRCH_CKIN)!=0 ){
db_multi_exec(
"WITH ckin(uuid,rid,mtime) AS ("
" SELECT blob.uuid, event.objid, event.mtime"
" FROM event, blob"
" WHERE event.type='ci'"
" AND blob.rid=event.objid"
")"
"INSERT INTO x(label,url,date,snip)"
" SELECT printf('Check-in [%%.10s] on %%s',uuid,datetime(mtime)),"
" printf('%R/timeline?c=%%s&n=8&y=ci',uuid),"
" datetime(mtime),"
" fsnippet(stext('c',rid,NULL))"
" FROM ckin;"
);
}
if( (srchFlags & SRCH_TKT)!=0 ){
db_multi_exec(
"INSERT INTO x(label,url,date,snip)"
" SELECT printf('Ticket [%%.17s] on %%s',"
"tkt_uuid,datetime(tkt_mtime)),"
" printf('%R/tktview/%%.20s',tkt_uuid),"
" datetime(tkt_mtime),"
" fsnippet(stext('t',tkt_id,NULL))"
" FROM ticket;"
);
}
db_multi_exec(
"UPDATE x SET score=substr(snip,1,8), snip=substr(snip,9)"
);
}
/*
** When this routine is called, there already exists a table
**
** x(label,url,score,date,snip).
**
** And the srchFlags parameter has been validated. This routine
** fills the X table with search results using a index scan.
**
** The companion full-text scan routine is search_fullscan().
*/
static void search_indexed(
const char *zPattern, /* The query pattern */
unsigned int srchFlags /* What to search over */
){
db_multi_exec(
"INSERT INTO x(label,url,score,date,snip) "
" SELECT ftsdocs.label,"
" ftsdocs.url,"
" 1," /*FIX ME*/
" datetime(ftsdocs.mtime),"
" fsnippet(ftsidx,'<b>','</b>',' ... ')"
" FROM ftsidx, ftsdocs"
" WHERE ftsidx MATCH %Q"
" AND ftsdocs.id=ftsidx.docid",
zPattern
);
}
/*
** This routine generates web-page output for a search operation.
** Other web-pages can invoke this routine to add search results
** in the middle of the page.
**
** Return the number of rows.
*/
int search_run_and_output(
const char *zPattern, /* The query pattern */
unsigned int srchFlags /* What to search over */
){
Stmt q;
int nRow = 0;
srchFlags = search_restrict(srchFlags);
if( srchFlags==0 ) return 0;
search_sql_setup(g.db);
add_content_sql_commands(g.db);
db_multi_exec(
"CREATE TEMP TABLE x(label,url,score,date,snip);"
);
if( !search_index_exists() ){
search_fullscan(zPattern, srchFlags);
}else{
search_indexed(zPattern, srchFlags);
}
db_prepare(&q, "SELECT url, snip, label"
" FROM x"
" ORDER BY score DESC, date DESC;");
while( db_step(&q)==SQLITE_ROW ){
const char *zUrl = db_column_text(&q, 0);
const char *zSnippet = db_column_text(&q, 1);
const char *zLabel = db_column_text(&q, 2);
if( nRow==0 ){
@ <ol>
}
|
| ︙ | ︙ | |||
924 925 926 927 928 929 930 | } /* The schema for the full-text index */ static const char zFtsSchema[] = @ -- One entry for each possible search result @ CREATE TABLE IF NOT EXISTS "%w".ftsdocs( | | > > > | | | 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 | } /* The schema for the full-text index */ static const char zFtsSchema[] = @ -- One entry for each possible search result @ CREATE TABLE IF NOT EXISTS "%w".ftsdocs( @ id INTEGER PRIMARY KEY, -- Maps to the ftsidx.docid @ type CHAR(1), -- Type of document @ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document @ name TEXT, -- Additional document description @ idxed BOOLEAN, -- True if currently in the index @ label TEXT, -- Label to print on search results @ url TEXT, -- URL to access this document @ mtime DATE, -- Date when document created @ UNIQUE(type,rid) @ ); @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS @ SELECT id, type, rid, name, idxed, label, url, mtime,fr 5 @ stext(type,rid,name) AS 'stext' @ FROM ftsdocs; @ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx @ USING fts4(content="ftscontent", stext); ; static const char zFtsDrop[] = @ DROP TABLE IF EXISTS "%w".ftsidx; @ DROP VIEW IF EXISTS "%w".ftscontent; |
| ︙ | ︙ | |||
1005 1006 1007 1008 1009 1010 1011 |
** updated. If the document has already been indexed, then unindex it
** now while we still have access to the old content. Add the document
** to the queue of documents that need to be indexed or reindexed.
*/
void search_doc_touch(char cType, int rid, const char *zName){
if( search_index_exists() ){
db_multi_exec(
| | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 |
** updated. If the document has already been indexed, then unindex it
** now while we still have access to the old content. Add the document
** to the queue of documents that need to be indexed or reindexed.
*/
void search_doc_touch(char cType, int rid, const char *zName){
if( search_index_exists() ){
db_multi_exec(
"DELETE FROM ftsidx WHERE docid IN"
" (SELECT id FROM ftsdocs WHERE type=%Q AND rid=%d AND idxed)",
cType, rid
);
db_multi_exec(
"REPLACE INTO ftsdocs(type,rid,name,idxed)"
" VALUES(%Q,%d,%Q,0)",
cType, rid, zName
);
}
}
/*
** If the doc-glob and doc-br settings are valid for document search
** and if the latest check-in on doc-br is in the unindexed set of
** check-ins, then update all 'd' entries in FTSDOCS that have
** changed.
*/
static void search_update_doc_index(void){
const char *zDocBr = db_get("doc-branch","trunk");
int ckid = zDocBr ? symbolic_name_to_rid(zDocBr,"ci") : 0;
double rTime;
char *zBrUuid;
if( ckid==0 ) return;
if( !db_exists("SELECT 1 FROM ftsdocs WHERE type='c' AND rid=%d"
" AND NOT idxed", ckid) ) return;
/* If we get this far, it means that changes to 'd' entries are
** required. */
rTime = db_double(0.0, "SELECT mtime FROM event WHERE objid=%d", ckid);
zBrUuid = db_text("","SELECT substr(uuid,1,20) FROM blob WHERE rid=%d",ckid);
db_multi_exec(
"CREATE TEMP TABLE current_docs(rid INTEGER PRIMARY KEY, name);"
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
"INSERT OR IGNORE INTO current_docs(rid, name)"
" SELECT blob.rid, foci.filename FROM foci, blob"
" WHERE foci.checkinID=%d AND blob.uuid=foci.uuid"
" AND %z",
ckid, glob_expr("foci.filename", db_get("doc-glob",""))
);
db_multi_exec(
"DELETE FROM ftsidx WHERE docid IN"
" (SELECT id FROM ftsdocs WHERE type='d'"
" AND rid NOT IN (SELECT rid FROM current_docs))"
);
db_multi_exec(
"DELETE FROM ftsdocs WHERE type='d'"
" AND rid NOT IN (SELECT rid FROM current_docs)"
);
db_multi_exec(
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,url,mtime)"
" SELECT 'd', rid, name, 0,"
" printf('Document: %%s',name),"
" printf('/doc/%q/%%s',urlencode(name)),"
" %.17g"
" FROM current_docs",
zBrUuid, rTime
);
db_multi_exec(
"INSERT INTO ftsidx(docid,stext)"
" SELECT id, stext FROM ftscontent WHERE type='d' AND NOT idxed"
);
db_multi_exec(
"UPDATE ftsdocs SET idxed=1 WHERE type='d' AND NOT idxed"
);
}
/*
** Deal with all of the unindexed entries in the FTSDOCS table - that
** is to say, all the entries with FTSDOCS.IDXED=0. Add them to the
** index.
*/
void search_update_index(void){
if( !search_index_exists() ) return;
search_sql_setup(g.db);
search_update_doc_index();
}
/*
** COMMAND: test-fts
*/
void test_fts_cmd(void){
char *zSubCmd;
int i, n;
static const struct { int iCmd; const char *z; } aCmd[] = {
{ 1, "create" },
{ 2, "drop" },
{ 3, "exists" },
{ 4, "fill" },
{ 8, "refill" },
{ 5, "pending" },
{ 6, "all" },
{ 7, "update" },
};
db_find_and_open_repository(0, 0);
if( g.argc<3 ) usage("SUBCMD ...");
zSubCmd = g.argv[2];
n = (int)strlen(zSubCmd);
for(i=0; i<ArraySize(aCmd); i++){
if( fossil_strncmp(aCmd[i].z, zSubCmd, n)==0 ) break;
|
| ︙ | ︙ | |||
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 |
case 3: { assert( fossil_strncmp(zSubCmd, "exist", n)==0 );
fossil_print("search_index_exists() = %d\n", search_index_exists());
break;
}
case 4: { assert( fossil_strncmp(zSubCmd, "fill", n)==0 );
search_fill_index();
break;
}
case 5: { assert( fossil_strncmp(zSubCmd, "pending", n)==0 );
Stmt q;
if( !search_index_exists() ) break;
| > > > > > > | > > > | | | | > | | > > > > > > > > > > > > > | 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 |
case 3: { assert( fossil_strncmp(zSubCmd, "exist", n)==0 );
fossil_print("search_index_exists() = %d\n", search_index_exists());
break;
}
case 4: { assert( fossil_strncmp(zSubCmd, "fill", n)==0 );
search_fill_index();
break;
}
case 8: { assert( fossil_strncmp(zSubCmd, "refill", n)==0 );
search_drop_index();
search_create_index();
search_fill_index();
break;
}
case 5: { assert( fossil_strncmp(zSubCmd, "pending", n)==0 );
Stmt q;
if( !search_index_exists() ) break;
db_prepare(&q, "SELECT id, type, rid, quote(label), url, date(mtime)"
" FROM ftsdocs"
" WHERE NOT idxed");
while( db_step(&q)==SQLITE_ROW ){
const char *zUrl = db_column_text(&q,4);
if( zUrl && zUrl[0] ){
fossil_print("%6d: %s %6d %s %s\n %s\n",
db_column_int(&q, 0),
db_column_text(&q, 1),
db_column_int(&q, 2),
db_column_text(&q, 5),
db_column_text(&q, 3),
zUrl);
}else{
fossil_print("%6d: %s %6d %s %s\n",
db_column_int(&q, 0),
db_column_text(&q, 1),
db_column_int(&q, 2),
db_column_text(&q, 5),
db_column_text(&q, 3));
}
}
db_finalize(&q);
break;
}
case 6: { assert( fossil_strncmp(zSubCmd, "all", n)==0 );
Stmt q;
if( !search_index_exists() ) break;
db_prepare(&q, "SELECT id, type, rid, quote(name), idxed FROM ftsdocs");
while( db_step(&q)==SQLITE_ROW ){
fossil_print("%6d: %s %6d %s%s\n",
db_column_int(&q, 0),
db_column_text(&q, 1),
db_column_int(&q, 2),
db_column_text(&q, 3),
db_column_int(&q, 4) ? "" : " (NOT INDEXED)"
);
}
db_finalize(&q);
break;
}
case 7: { assert( fossil_strncmp(zSubCmd, "update", n)==0 );
search_update_index();
break;
}
}
db_end_transaction(0);
}
|