Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Initial work on the search_stext() function used to extract searchable text from formatted files. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
48e1e18304a2ec18173e4985977a28f7 |
| User & Date: | drh 2015-01-31 22:13:39.480 |
Context
|
2015-02-01
| ||
| 00:15 | The /search page now covers wiki and check-in comments. And the formatting of snippets is improved. The search is still done by full-scan but the infrastructure is coming into place to handle the search using an index. ... (check-in: 8e02c26ad2 user: drh tags: trunk) | |
|
2015-01-31
| ||
| 22:13 | Initial work on the search_stext() function used to extract searchable text from formatted files. ... (check-in: 48e1e18304 user: drh tags: trunk) | |
| 19:58 | Add a routine that attempts to strip all markup off of HTML text. The intended use is in the search logic. ... (check-in: cbd8e67f73 user: drh tags: trunk) | |
Changes
Changes to src/doc.c.
| ︙ | ︙ | |||
348 349 350 351 352 353 354 355 356 357 358 359 360 361 |
*/
void mimetype_test_cmd(void){
int i;
for(i=2; i<g.argc; i++){
fossil_print("%-20s -> %s\n", g.argv[i], mimetype_from_name(g.argv[i]));
}
}
/*
** WEBPAGE: doc
** URL: /doc?name=CHECKIN/FILE
** URL: /doc/CHECKIN/FILE
**
** CHECKIN can be either tag or SHA1 hash or timestamp identifying a
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 |
*/
void mimetype_test_cmd(void){
int i;
for(i=2; i<g.argc; i++){
fossil_print("%-20s -> %s\n", g.argv[i], mimetype_from_name(g.argv[i]));
}
}
/*
** Look for a file named zName in the checkin with RID=vid. Load the content
** of that file into pContent and return the RID for the file. Or return 0
** if the file is not found or could not be loaded.
*/
int doc_load_content(int vid, const char *zName, Blob *pContent){
int rid; /* The RID of the file being loaded */
if( !db_table_exists("repository","vcache") ){
db_multi_exec(
"CREATE TABLE IF NOT EXISTS vcache(\n"
" vid INTEGER, -- checkin ID\n"
" fname TEXT, -- filename\n"
" rid INTEGER, -- artifact ID\n"
" PRIMARY KEY(vid,fname)\n"
") WITHOUT ROWID"
);
}
if( !db_exists("SELECT 1 FROM vcache WHERE vid=%d", vid) ){
db_multi_exec(
"DELETE FROM vcache;\n"
"CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;\n"
"INSERT INTO vcache(vid,fname,rid)"
" SELECT checkinID, filename, blob.rid FROM foci, blob"
" WHERE blob.uuid=foci.uuid"
" AND foci.checkinID=%d;",
vid
);
}
rid = db_int(0, "SELECT rid FROM vcache"
" WHERE vid=%d AND fname=%Q", vid, zName);
if( rid && content_get(rid, pContent)==0 ){
rid = 0;
}
return rid;
}
/*
** WEBPAGE: doc
** URL: /doc?name=CHECKIN/FILE
** URL: /doc/CHECKIN/FILE
**
** CHECKIN can be either tag or SHA1 hash or timestamp identifying a
|
| ︙ | ︙ | |||
395 396 397 398 399 400 401 402 403 404 405 406 407 408 |
int nMiss = (-1); /* Failed attempts to find the document */
static const char *const azSuffix[] = {
"index.html", "index.wiki", "index.md"
};
login_check_credentials();
if( !g.perm.Read ){ login_needed(); return; }
while( rid==0 && (++nMiss)<=ArraySize(azSuffix) ){
zName = PD("name", "tip/index.wiki");
for(i=0; zName[i] && zName[i]!='/'; i++){}
zCheckin = mprintf("%.*s", i, zName);
if( fossil_strcmp(zCheckin,"ckout")==0 && db_open_local(0)==0 ){
zCheckin = "tip";
}
| > | 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 |
int nMiss = (-1); /* Failed attempts to find the document */
static const char *const azSuffix[] = {
"index.html", "index.wiki", "index.md"
};
login_check_credentials();
if( !g.perm.Read ){ login_needed(); return; }
db_begin_transaction();
while( rid==0 && (++nMiss)<=ArraySize(azSuffix) ){
zName = PD("name", "tip/index.wiki");
for(i=0; zName[i] && zName[i]!='/'; i++){}
zCheckin = mprintf("%.*s", i, zName);
if( fossil_strcmp(zCheckin,"ckout")==0 && db_open_local(0)==0 ){
zCheckin = "tip";
}
|
| ︙ | ︙ | |||
435 436 437 438 439 440 441 |
zFullpath = mprintf("%s/%s", g.zLocalRoot, zName);
if( file_isfile(zFullpath)
&& blob_read_from_file(&filebody, zFullpath)>0 ){
rid = 1; /* Fake RID just to get the loop to end */
}
fossil_free(zFullpath);
}else{
| < < < < < < < < < < < < < < < < < < < < < | < < < < | 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 |
zFullpath = mprintf("%s/%s", g.zLocalRoot, zName);
if( file_isfile(zFullpath)
&& blob_read_from_file(&filebody, zFullpath)>0 ){
rid = 1; /* Fake RID just to get the loop to end */
}
fossil_free(zFullpath);
}else{
vid = name_to_typed_rid(zCheckin, "ci");
rid = doc_load_content(vid, zName, &filebody);
}
}
if( rid==0 ) goto doc_not_found;
blob_to_utf8_no_bom(&filebody, 0);
/* The file is now contained in the filebody blob. Deliver the
** file to the user
|
| ︙ | ︙ | |||
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 |
style_footer();
#endif
}else{
cgi_set_content_type(zMime);
cgi_set_content(&filebody);
}
if( nMiss>=ArraySize(azSuffix) ) cgi_set_status(404, "Not Found");
return;
/* Jump here when unable to locate the document */
doc_not_found:
db_end_transaction(0);
cgi_set_status(404, "Not Found");
style_header("Not Found");
@ <p>Document %h(zOrigName) not found
if( fossil_strcmp(zCheckin,"ckout")!=0 ){
@ in %z(href("%R/tree?ci=%T",zCheckin))%h(zCheckin)</a>
}
style_footer();
return;
}
/*
** The default logo.
*/
static const unsigned char aLogo[] = {
| > > | 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 |
style_footer();
#endif
}else{
cgi_set_content_type(zMime);
cgi_set_content(&filebody);
}
if( nMiss>=ArraySize(azSuffix) ) cgi_set_status(404, "Not Found");
db_end_transaction(0);
return;
/* Jump here when unable to locate the document */
doc_not_found:
db_end_transaction(0);
cgi_set_status(404, "Not Found");
style_header("Not Found");
@ <p>Document %h(zOrigName) not found
if( fossil_strcmp(zCheckin,"ckout")!=0 ){
@ in %z(href("%R/tree?ci=%T",zCheckin))%h(zCheckin)</a>
}
style_footer();
db_end_transaction(0);
return;
}
/*
** The default logo.
*/
static const unsigned char aLogo[] = {
|
| ︙ | ︙ |
Changes to src/search.c.
| ︙ | ︙ | |||
561 562 563 564 565 566 567 |
@ <li><p>%s(href("%s",zUrl))%h(zUrl)</a><br>%s(zSnippet)</li>
}
db_finalize(&q);
@ </ol>
}
style_footer();
}
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 |
@ <li><p>%s(href("%s",zUrl))%h(zUrl)</a><br>%s(zSnippet)</li>
}
db_finalize(&q);
@ </ol>
}
style_footer();
}
/*
** This is a helper function for search_stext(). Writing into pOut
** the search text obtained from pIn according to zMimetype.
*/
static void get_stext_by_mimetype(
Blob *pIn,
const char *zMimetype,
Blob *pOut
){
Blob html, title;
blob_init(&html, 0, 0);
blob_init(&title, 0, 0);
if( zMimetype==0 ) zMimetype = "text/plain";
if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){
wiki_convert(pIn, &html, 0);
html_to_plaintext(blob_str(&html), pOut);
}else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){
markdown_to_html(pIn, &title, &html);
html_to_plaintext(blob_str(&html), pOut);
}else if( fossil_strcmp(zMimetype,"text/html")==0 ){
html_to_plaintext(blob_str(pIn), pOut);
}else{
*pOut = *pIn;
blob_init(pIn, 0, 0);
}
blob_reset(&html);
blob_reset(&title);
}
/*
** Return "search text" - a reduced version of a document appropriate for
** full text search and/or for constructing a search result snippet.
**
** cType: d Embedded documentation
** s Source code listing
** w Wiki page
** c Check-in comment
** t Ticket text
** e Event/Blog text
** k Diff of a wiki
** f Diff of a checkin
**
** zArg1, zArg2: Description of the document, depending on cType.
*/
void search_stext(
char cType, /* Type of document */
const char *zArg1, /* First parameter */
const char *zArg2, /* Second parameter */
Blob *pOut /* OUT: Initialize to the search text */
){
blob_init(pOut, 0, 0);
switch( cType ){
case 'd': /* Doc. zArg1: RID of the file. zArg2: Filename */
case 's': { /* Source. zArg1: RID of the file. zArg2: Filename */
int rid = atoi(zArg1);
Blob doc;
content_get(rid, &doc);
blob_to_utf8_no_bom(&doc, 0);
get_stext_by_mimetype(&doc, mimetype_from_name(zArg2), pOut);
blob_reset(&doc);
break;
}
case 'w': { /* Wiki. zArg1: RID of the page. zArg2: Page name */
int rid = atoi(zArg1);
Manifest *pWiki = manifest_get(rid, CFTYPE_WIKI,0);
Blob wiki;
if( pWiki==0 ) break;
blob_init(&wiki, pWiki->zWiki, -1);
get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
pOut);
blob_reset(&wiki);
manifest_destroy(pWiki);
break;
}
}
}
/*
** COMMAND: test-search-stext
**
** Usage: fossil test-search-stext TYPE ARG1 ARG2
*/
void test_search_stext(void){
Blob out;
db_find_and_open_repository(0,0);
if( g.argc!=5 ) usage("TYPE ARG1 ARG2");
search_stext(g.argv[2][0], g.argv[3], g.argv[4], &out);
fossil_print("%s",blob_str(&out));
blob_reset(&out);
}
|