Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Enabled indexed search with separate title and body and with the option to use the Porter stemmer. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | search-enhancements |
| Files: | files | file ages | folders |
| SHA1: |
71295a98b7bce92bb20158f529021ef8 |
| User & Date: | drh 2015-02-14 00:37:23.617 |
Context
|
2015-02-14
| ||
| 02:12 | Improvements to the ranking function. Add the undocumented "debug" query parameter to /search. Closed-Leaf check-in: 9f67861aed user: drh tags: search-enhancements | |
| 00:37 | Enabled indexed search with separate title and body and with the option to use the Porter stemmer. check-in: 71295a98b7 user: drh tags: search-enhancements | |
|
2015-02-13
| ||
| 23:43 | Show document, ticket, and wiki titles on the result page of unindexed search. check-in: 0e77f1fbc0 user: drh tags: search-enhancements | |
Changes
Changes to src/db.c.
| ︙ | ︙ | |||
63 64 65 66 67 68 69 |
/*
** Call this routine when a database error occurs.
*/
static void db_err(const char *zFormat, ...){
va_list ap;
char *z;
int rc = 1;
| < < < < | < | | 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
/*
** Call this routine when a database error occurs.
*/
static void db_err(const char *zFormat, ...){
va_list ap;
char *z;
int rc = 1;
va_start(ap, zFormat);
z = vmprintf(zFormat, ap);
va_end(ap);
#ifdef FOSSIL_ENABLE_JSON
if( g.json.isJsonMode ){
json_err( 0, z, 1 );
if( g.isHTTP ){
rc = 0 /* avoid HTTP 500 */;
}
}
else
#endif /* FOSSIL_ENABLE_JSON */
if( g.xferPanic ){
cgi_reset_content();
@ error Database\serror:\s%F(z)
cgi_reply();
}
else if( g.cgiOutput ){
g.cgiOutput = 0;
cgi_printf("<h1>Database Error</h1>\n<p>%h</p>\n", z);
cgi_reply();
}else{
fprintf(stderr, "%s: %s\n", g.argv[0], z);
}
free(z);
db_force_rollback();
fossil_exit(rc);
}
/*
|
| ︙ | ︙ |
Changes to src/search.c.
| ︙ | ︙ | |||
728 729 730 731 732 733 734 735 |
static void search_rank_sqlfunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
int nVal = sqlite3_value_bytes(argv[0])/4;
int nTerm; /* Number of search terms in the query */
| > | > | < > > > > > | | | | | | | > > | 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 |
static void search_rank_sqlfunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
int nVal = sqlite3_value_bytes(argv[0])/4;
int nCol; /* Number of columns in the index */
int nTerm; /* Number of search terms in the query */
int i, j; /* Loop counter */
double r = 1.0; /* Score */
const unsigned *aX, *aS;
if( nVal<2 ) return;
nTerm = aVal[0];
nCol = aVal[1];
if( nVal<2+3*nCol*nTerm+4*nCol ) return;
aS = aVal+2;
aX = aS+nCol;
for(j=0; j<nCol; j++){
r *= 1<<((30*(aS[j]-1))/nTerm);
for(i=0; i<nTerm; i++){
int hits_this_row = aX[j + i*nCol];
int hits_all_rows = aX[j + i*nCol + 1];
int rows_with_hit = aX[j + i*nCol + 2];
double avg_hits_per_row = (double)hits_all_rows/(double)rows_with_hit;
r *= hits_this_row/avg_hits_per_row;
}
r *= 2.0;
}
#define SEARCH_DEBUG_RANK 0
#if SEARCH_DEBUG_RANK
{
Blob x;
blob_init(&x,0,0);
blob_appendf(&x,"%08x", (int)r);
|
| ︙ | ︙ | |||
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 | @ type CHAR(1), -- Type of document @ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document @ name TEXT, -- Additional document description @ idxed BOOLEAN, -- True if currently in the index @ label TEXT, -- Label to print on search results @ url TEXT, -- URL to access this document @ mtime DATE, -- Date when document created @ UNIQUE(type,rid) @ ); @ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0; @ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w'; @ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS @ SELECT rowid, type, rid, name, idxed, label, url, mtime, | > | | > > | | | 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 |
@ type CHAR(1), -- Type of document
@ rid INTEGER, -- BLOB.RID or TAG.TAGID for the document
@ name TEXT, -- Additional document description
@ idxed BOOLEAN, -- True if currently in the index
@ label TEXT, -- Label to print on search results
@ url TEXT, -- URL to access this document
@ mtime DATE, -- Date when document created
@ bx TEXT, -- Temporary "body" content cache
@ UNIQUE(type,rid)
@ );
@ CREATE INDEX "%w".ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
@ CREATE INDEX "%w".ftsdocName ON ftsdocs(name) WHERE type='w';
@ CREATE VIEW IF NOT EXISTS "%w".ftscontent AS
@ SELECT rowid, type, rid, name, idxed, label, url, mtime,
@ title(type,rid,name) AS 'title', body(type,rid,name) AS 'body'
@ FROM ftsdocs;
@ CREATE VIRTUAL TABLE IF NOT EXISTS "%w".ftsidx
@ USING fts4(content="ftscontent", title, body%s);
;
static const char zFtsDrop[] =
@ DROP TABLE IF EXISTS "%w".ftsidx;
@ DROP VIEW IF EXISTS "%w".ftscontent;
@ DROP TABLE IF EXISTS "%w".ftsdocs;
;
/*
** Create or drop the tables associated with a full-text index.
*/
static int searchIdxExists = -1;
void search_create_index(void){
const char *zDb = db_name("repository");
int useStemmer = db_get_boolean("search-stemmer",0);
const char *zExtra = useStemmer ? ",tokenize=porter" : "";
search_sql_setup(g.db);
db_multi_exec(zFtsSchema/*works-like:"%w%w%w%w%w%s"*/,
zDb, zDb, zDb, zDb, zDb, zExtra/*safe-for-%s*/);
searchIdxExists = 1;
}
void search_drop_index(void){
const char *zDb = db_name("repository");
db_multi_exec(zFtsDrop/*works-like:"%w%w%w"*/, zDb, zDb, zDb);
searchIdxExists = 0;
}
|
| ︙ | ︙ | |||
1439 1440 1441 1442 1443 1444 1445 |
" AND rid NOT IN (SELECT rid FROM current_docs))"
);
db_multi_exec(
"DELETE FROM ftsdocs WHERE type='d'"
" AND rid NOT IN (SELECT rid FROM current_docs)"
);
db_multi_exec(
| | > | | | | > > > | | | | > | | | | 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 |
" AND rid NOT IN (SELECT rid FROM current_docs))"
);
db_multi_exec(
"DELETE FROM ftsdocs WHERE type='d'"
" AND rid NOT IN (SELECT rid FROM current_docs)"
);
db_multi_exec(
"INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)"
" SELECT 'd', rid, name, 0,"
" 'Document: '||title('d',rid,name),"
" body('d',rid,name),"
" printf('/doc/%q/%%s',urlencode(name)),"
" %.17g"
" FROM current_docs",
zBrUuid, rTime
);
db_multi_exec(
"INSERT INTO ftsidx(docid,title,body)"
" SELECT rowid, name, bx FROM ftsdocs WHERE type='d' AND NOT idxed"
);
db_multi_exec(
"UPDATE ftsdocs SET"
" idxed=1,"
" bx=NULL"
" WHERE type='d' AND NOT idxed"
);
}
/*
** Deal with all of the unindexed 'c' terms in FTSDOCS
*/
static void search_update_checkin_index(void){
db_multi_exec(
"INSERT INTO ftsidx(docid,title,body)"
" SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs"
" WHERE type='c' AND NOT idxed;"
);
db_multi_exec(
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
" SELECT ftsdocs.rowid, 1, 'c', ftsdocs.rid, NULL,"
" printf('Check-in [%%.16s] on %%s',blob.uuid,datetime(event.mtime)),"
" printf('/timeline?y=ci&c=%%.20s',blob.uuid),"
" event.mtime"
" FROM ftsdocs, event, blob"
" WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed"
" AND event.objid=ftsdocs.rid"
" AND blob.rid=ftsdocs.rid"
);
}
/*
** Deal with all of the unindexed 't' terms in FTSDOCS
*/
static void search_update_ticket_index(void){
db_multi_exec(
"INSERT INTO ftsidx(docid,title,body)"
" SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs"
" WHERE type='t' AND NOT idxed;"
);
if( db_changes()==0 ) return;
db_multi_exec(
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
" SELECT ftsdocs.rowid, 1, 't', ftsdocs.rid, NULL,"
" printf('Ticket: %%s (%%s)',title('t',tkt_id,null),"
" datetime(tkt_mtime)),"
" printf('/tktview/%%.20s',tkt_uuid),"
" tkt_mtime"
" FROM ftsdocs, ticket"
" WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed"
" AND ticket.tkt_id=ftsdocs.rid"
);
}
/*
** Deal with all of the unindexed 'w' terms in FTSDOCS
*/
static void search_update_wiki_index(void){
db_multi_exec(
"INSERT INTO ftsidx(docid,title,body)"
" SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs"
" WHERE type='w' AND NOT idxed;"
);
if( db_changes()==0 ) return;
db_multi_exec(
"REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
" SELECT ftsdocs.rowid, 1, 'w', ftsdocs.rid, ftsdocs.name,"
" 'Wiki: '||ftsdocs.name,"
|
| ︙ | ︙ | |||
1563 1564 1565 1566 1567 1568 1569 | ** COMMAND: fts-config* ** ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? ** ** The "fossil fts-config" command configures the full-text search capabilities ** of the repository. Subcommands: ** | | | > > > > | | | | | 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 |
** COMMAND: fts-config*
**
** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT?
**
** The "fossil fts-config" command configures the full-text search capabilities
** of the repository. Subcommands:
**
** reindex Rebuild the search index. This is a no-op if
** index search is disabled
**
** index (on|off) Turn the search index on or off
**
** enable cdtw Enable various kinds of search. c=Check-ins,
** d=Documents, t=Tickets, w=Wiki.
**
** disable cdtw Disable versious kinds of search
**
** stemmer (on|off) Turn the Porter stemmer on or off for indexed
** search. (Unindexed search is never stemmed.)
**
** The current search settings are displayed after any changes are applied.
** Run this command with no arguments to simply see the settings.
*/
void test_fts_cmd(void){
static const struct { int iCmd; const char *z; } aCmd[] = {
{ 1, "reindex" },
{ 2, "index" },
{ 3, "disable" },
{ 4, "enable" },
{ 5, "stemmer" },
};
static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
{ "search-ckin", "check-in search:", "c" },
{ "search-doc", "document search:", "d" },
{ "search-tkt", "ticket search:", "t" },
{ "search-wiki", "wiki search:", "w" },
};
char *zSubCmd;
int i, j, n;
int iCmd = 0;
int iAction = 0;
db_find_and_open_repository(0, 0);
if( g.argc>2 ){
|
| ︙ | ︙ | |||
1611 1612 1613 1614 1615 1616 1617 |
fossil_fatal("unknown \"%s\" - should be on of:%s",
zSubCmd, blob_str(&all));
return;
}
iCmd = aCmd[i].iCmd;
}
if( iCmd==1 ){
| | | > > > > > > > | 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 |
fossil_fatal("unknown \"%s\" - should be on of:%s",
zSubCmd, blob_str(&all));
return;
}
iCmd = aCmd[i].iCmd;
}
if( iCmd==1 ){
if( search_index_exists() ) iAction = 2;
}
if( iCmd==2 ){
if( g.argc<3 ) usage("index (on|off)");
iAction = 1 + is_truth(g.argv[3]);
}
db_begin_transaction();
/* Adjust search settings */
if( iCmd==3 || iCmd==4 ){
const char *zCtrl;
if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd));
zCtrl = g.argv[3];
for(j=0; j<ArraySize(aSetng); j++){
if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){
db_set_int(aSetng[j].zSetting, iCmd-3, 0);
}
}
}
if( iCmd==5 ){
if( g.argc<4 ) usage("porter ON/OFF");
db_set_int("search-stemmer", is_truth(g.argv[3]), 0);
}
/* destroy or rebuild the index, if requested */
if( iAction>=1 ){
search_drop_index();
}
if( iAction>=2 ){
search_rebuild_index();
}
/* Always show the status before ending */
for(i=0; i<ArraySize(aSetng); i++){
fossil_print("%-16s %s\n", aSetng[i].zName,
db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
}
fossil_print("%-16s %s\n", "Porter stemmer:",
db_get_boolean("search-stemmer",0) ? "on" : "off");
if( search_index_exists() ){
fossil_print("%-16s enabled\n", "full-text index:");
fossil_print("%-16s %d\n", "documents:",
db_int(0, "SELECT count(*) FROM ftsdocs"));
}else{
fossil_print("%-16s disabled\n", "full-text index:");
}
db_end_transaction(0);
}
|
Changes to src/setup.c.
| ︙ | ︙ | |||
2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 |
search_create_index();
search_fill_index();
search_update_index(search_restrict(SRCH_ALL));
}
if( search_index_exists() ){
@ <p>Currently using an SQLite FTS4 search index. This makes search
@ run faster, especially on large repositories, but takes up space.</p>
@ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
@ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
}else{
@ <p>The SQLite FTS4 search index is disabled. All searching will be
@ a full-text scan. This usually works fine, but can be slow for
@ larger repositories.</p>
@ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
}
@ </div></form>
style_footer();
}
| > > | 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 |
search_create_index();
search_fill_index();
search_update_index(search_restrict(SRCH_ALL));
}
if( search_index_exists() ){
@ <p>Currently using an SQLite FTS4 search index. This makes search
@ run faster, especially on large repositories, but takes up space.</p>
onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
@ <p><input type="submit" name="fts0" value="Delete The Full-Text Index">
@ <input type="submit" name="fts1" value="Rebuild The Full-Text Index">
}else{
@ <p>The SQLite FTS4 search index is disabled. All searching will be
@ a full-text scan. This usually works fine, but can be slow for
@ larger repositories.</p>
onoff_attribute("Use Porter Stemmer","search-stemmer","ss",0,0);
@ <p><input type="submit" name="fts1" value="Create A Full-Text Index">
}
@ </div></form>
style_footer();
}
|