Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Performance optimization for the alignment calculation on side-by-side diffs. Noticably faster. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
87f867018b25a138b5e4df1d23b3c8e3 |
| User & Date: | drh 2012-02-07 18:58:34.115 |
Context
|
2012-02-07
| ||
| 20:04 | Another minor performance enhancement on sbs diff. ... (check-in: 3e3feb2dda user: drh tags: trunk) | |
| 18:58 | Performance optimization for the alignment calculation on side-by-side diffs. Noticably faster. ... (check-in: 87f867018b user: drh tags: trunk) | |
| 18:13 | Optimize the inner loop of the LCS algorithm for the main diff generator. ... (check-in: 4ab6071145 user: drh tags: trunk) | |
Changes
Changes to src/diff.c.
| ︙ | ︙ | |||
531 532 533 534 535 536 537 538 539 540 541 542 543 |
sbsWrite(p, " | ", 3);
sbsWriteLineno(p, lnRight);
p->iEnd = nRight - nSuffix;
sbsWriteText(p, pRight, SBS_NEWLINE);
}
}
/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
** completely different.
*/
static int match_dline(DLine *pA, DLine *pB){
| > > > > > > > > > > > > | | | | | | > > > > > > > > > > > > > > > | | | < > | > > > > > | 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 |
sbsWrite(p, " | ", 3);
sbsWriteLineno(p, lnRight);
p->iEnd = nRight - nSuffix;
sbsWriteText(p, pRight, SBS_NEWLINE);
}
}
/*
** Minimum of two values
*/
static int minInt(int a, int b){ return a<b ? a : b; }
/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
** completely different.
**
** The current algorithm is as follows:
**
** (1) Remove leading and trailing whitespace.
** (2) Truncate both strings to at most 250 characters
** (3) Find the length of the longest common subsequence
** (4) Longer common subsequences yield lower scores.
*/
static int match_dline(DLine *pA, DLine *pB){
const char *zA; /* Left string */
const char *zB; /* right string */
int nA; /* Bytes in zA[] */
int nB; /* Bytes in zB[] */
int avg; /* Average length of A and B */
int i, j, k; /* Loop counters */
int best = 0; /* Longest match found so far */
int score; /* Final score. 0..100 */
unsigned char c; /* Character being examined */
unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */
zA = pA->z;
zB = pB->z;
nA = pA->h & LENGTH_MASK;
nB = pB->h & LENGTH_MASK;
while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
if( nA>250 ) nA = 250;
if( nB>250 ) nB = 250;
avg = (nA+nB)/2;
if( avg==0 ) return 0;
memset(aFirst, 0, sizeof(aFirst));
memset(aNext, 0, nB);
zA--; zB--; /* Make both zA[] and zB[] 1-indexed */
for(i=nB; i>0; i--){
c = (unsigned char)zB[i];
aNext[i] = aFirst[c];
aFirst[c] = i;
}
best = 0;
for(i=1; i<=nA; i++){
c = (unsigned char)zA[i];
for(j=aFirst[c]; j>0; j = aNext[j]){
int limit = minInt(nA-i, nB-j)+1;
for(k=1; k<limit && zA[k+i]==zB[k+j]; k++){}
if( k>best ) best = k;
}
}
score = (best>avg) ? 0 : (avg - best)*100/avg;
#if 0
fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
nA, zA+1, nB, zB+1, best, avg, score);
#endif
/* Return the result */
return score;
}
/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right. This routine computes
|
| ︙ | ︙ | |||
895 896 897 898 899 900 901 | } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } | < < < < < | 927 928 929 930 931 932 933 934 935 936 937 938 939 940 | } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } /* ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence ** of lines in these two blocks that are exactly the same. Return ** the bounds of the matching sequence. ** ** If there are two or more possible answers of the same length, the |
| ︙ | ︙ |