Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Add fts-config tokenizer unicode61 option. Prompted by [forum:a4bfcff66548a1ff|forum post a4bfcff66548a1ff]. |
|---|---|
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
e180dbb4559d5c85b6d293ab12dbb64c |
| User & Date: | stephan 2023-08-18 12:17:38.981 |
| Original Comment: | Add fts-config tokenizer unicode61 option. Prompted by [forum post a4bfcff66548a1ff|forum:a4bfcff66548a1ff]. |
Context
|
2023-08-18
| ||
| 13:03 | Added "unicode61" to search setup usage message check-in: 9965e1d86f user: wyoung tags: trunk | |
| 12:17 | Add fts-config tokenizer unicode61 option. Prompted by [forum:a4bfcff66548a1ff|forum post a4bfcff66548a1ff]. check-in: e180dbb455 user: stephan tags: trunk | |
|
2023-08-14
| ||
| 21:09 | Make sure the EmailEvent object is completely zeroed whenever it is allocated. check-in: 33877fa50b user: drh tags: trunk | |
Changes
Changes to src/search.c.
| ︙ | ︙ | |||
1549 1550 1551 1552 1553 1554 1555 | @ DROP TABLE IF EXISTS repository.ftsdocs; ; #if INTERFACE /* ** Values for the search-tokenizer config option. */ | | | > | | 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 | @ DROP TABLE IF EXISTS repository.ftsdocs; ; #if INTERFACE /* ** Values for the search-tokenizer config option. */ #define FTS5TOK_NONE 0 /* disabled */ #define FTS5TOK_PORTER 1 /* porter stemmer */ #define FTS5TOK_UNICODE61 2 /* unicode61 tokenizer */ #define FTS5TOK_TRIGRAM 3 /* trigram tokenizer */ #endif /* ** Cached FTS5TOK_xyz value for search_tokenizer_type() and ** friends. */ static int iFtsTokenizer = -1; |
| ︙ | ︙ | |||
1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 |
return iFtsTokenizer;
}
z = db_get("search-tokenizer",0);
if( 0==z ){
iFtsTokenizer = FTS5TOK_NONE;
}else if(0==fossil_strcmp(z,"porter")){
iFtsTokenizer = FTS5TOK_PORTER;
}else if(0==fossil_strcmp(z,"trigram")){
iFtsTokenizer = FTS5TOK_TRIGRAM;
}else{
iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
}
fossil_free(z);
return iFtsTokenizer;
| > > | 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 |
return iFtsTokenizer;
}
z = db_get("search-tokenizer",0);
if( 0==z ){
iFtsTokenizer = FTS5TOK_NONE;
}else if(0==fossil_strcmp(z,"porter")){
iFtsTokenizer = FTS5TOK_PORTER;
}else if(0==fossil_strcmp(z,"unicode61")){
iFtsTokenizer = FTS5TOK_UNICODE61;
}else if(0==fossil_strcmp(z,"trigram")){
iFtsTokenizer = FTS5TOK_TRIGRAM;
}else{
iFtsTokenizer = is_truth(z) ? FTS5TOK_PORTER : FTS5TOK_NONE;
}
fossil_free(z);
return iFtsTokenizer;
|
| ︙ | ︙ | |||
1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 |
if( 0==z ){
z = zTmp = db_get("search-tokenizer",0);
}
if( 0==z ){
zRc = "off";
}else if( 0==fossil_strcmp(z,"porter") ){
zRc = "porter";
}else if( 0==fossil_strcmp(z,"trigram") ){
zRc = "trigram";
}else{
zRc = is_truth(z) ? "porter" : "off";
}
fossil_free(zTmp);
return zRc;
| > > | 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 |
if( 0==z ){
z = zTmp = db_get("search-tokenizer",0);
}
if( 0==z ){
zRc = "off";
}else if( 0==fossil_strcmp(z,"porter") ){
zRc = "porter";
}else if( 0==fossil_strcmp(z,"unicode61") ){
zRc = "unicode61";
}else if( 0==fossil_strcmp(z,"trigram") ){
zRc = "trigram";
}else{
zRc = is_truth(z) ? "porter" : "off";
}
fossil_free(zTmp);
return zRc;
|
| ︙ | ︙ | |||
1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 |
*/
static int searchIdxExists = -1;
void search_create_index(void){
const int useTokenizer = search_tokenizer_type(0);
const char *zExtra;
switch(useTokenizer){
case FTS5TOK_PORTER: zExtra = ",tokenize=porter"; break;
case FTS5TOK_TRIGRAM: zExtra = ",tokenize=trigram"; break;
default: zExtra = ""; break;
}
search_sql_setup(g.db);
db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/);
searchIdxExists = 1;
}
| > | 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 |
*/
static int searchIdxExists = -1;
void search_create_index(void){
const int useTokenizer = search_tokenizer_type(0);
const char *zExtra;
switch(useTokenizer){
case FTS5TOK_PORTER: zExtra = ",tokenize=porter"; break;
case FTS5TOK_UNICODE61: zExtra = ",tokenize=unicode61"; break;
case FTS5TOK_TRIGRAM: zExtra = ",tokenize=trigram"; break;
default: zExtra = ""; break;
}
search_sql_setup(g.db);
db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/);
searchIdxExists = 1;
}
|
| ︙ | ︙ | |||
1979 1980 1981 1982 1983 1984 1985 | ** ** enable cdtwe Enable various kinds of search. c=Check-ins, ** d=Documents, t=Tickets, w=Wiki, e=Tech Notes. ** ** disable cdtwe Disable various kinds of search ** ** tokenizer VALUE Select a tokenizer for indexed search. VALUE | | | | 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 |
**
** enable cdtwe Enable various kinds of search. c=Check-ins,
** d=Documents, t=Tickets, w=Wiki, e=Tech Notes.
**
** disable cdtwe Disable various kinds of search
**
** tokenizer VALUE Select a tokenizer for indexed search. VALUE
** may be one of (porter, on, off, trigram, unicode61),
** and "on" is equivalent to "porter". Unindexed
** search never uses tokenization or stemming.
**
** The current search settings are displayed after any changes are applied.
** Run this command with no arguments to simply see the settings.
*/
void fts_config_cmd(void){
static const struct {
|
| ︙ | ︙ |
Changes to src/setup.c.
| ︙ | ︙ | |||
2014 2015 2016 2017 2018 2019 2020 |
** Renders a selection list of values for the search-tokenizer
** setting, using the form field name "ftstok".
*/
static void select_fts_tokenizer(void){
const char *const aTokenizer[] = {
"off", "None",
"porter", "Porter Stemmer",
| > | | | 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 |
** Renders a selection list of values for the search-tokenizer
** setting, using the form field name "ftstok".
*/
static void select_fts_tokenizer(void){
const char *const aTokenizer[] = {
"off", "None",
"porter", "Porter Stemmer",
"unicode61", "Unicode without stemming",
"trigram", "Trigram",
};
multiple_choice_attribute("FTS Tokenizer", "search-tokenizer",
"ftstok", "off", 4, aTokenizer);
}
/*
** WEBPAGE: srchsetup
**
** Configure the search engine. Requires Admin privilege.
*/
|
| ︙ | ︙ |