Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Tweaks to the diff algorithm give a 4x performance increase. Now comparable to command-line diff. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
e8cf0061cc47c87521f9401c42c52fd4 |
| User & Date: | drh 2008-02-04 13:53:55.000 |
Context
|
2008-02-04
| ||
| 14:05 | Improvements to comments on the diff algorithm code. Completely remove the older Wagner/Myers algorithm which had been commented out. ... (check-in: eeea77f340 user: drh tags: trunk) | |
| 13:53 | Tweaks to the diff algorithm give a 4x performance increase. Now comparable to command-line diff. ... (check-in: e8cf0061cc user: drh tags: trunk) | |
| 13:14 | Better error message when attempting to create a new repository in a directory that does not exist. ... (check-in: 97ff24dec7 user: drh tags: trunk) | |
Changes
Changes to src/diff.c.
| ︙ | ︙ | |||
303 304 305 306 307 308 309 |
static void longestCommonSequence(
DContext *p,
int iS1, int iE1,
int iS2, int iE2,
int *piSX, int *piEX,
int *piSY, int *piEY
){
| | | | | > > > > > | | | | > > > | | | | > > > > > > > > > | 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
static void longestCommonSequence(
DContext *p,
int iS1, int iE1,
int iS2, int iE2,
int *piSX, int *piEX,
int *piSY, int *piEY
){
double bestScore = -1e30; /* Best score seen so far */
int i, j; /* Loop counters */
int iSX, iSY, iEX, iEY; /* Current match */
double score; /* Current score */
int skew; /* How lopsided is the match */
int dist; /* Distance of match from center */
int mid; /* Center of the span */
int iSXb, iSYb, iEXb, iEYb; /* Best match so far */
int iSXp, iSYp, iEXp, iEYp; /* Previous match */
iSXb = iSXp = iS1;
iEXb = iEXp = iS1;
iSYb = iSYp = iS2;
iEYb = iEYp = iS2;
mid = (iE1 + iS1)/2;
for(i=iS1; i<iE1; i++){
int limit = 0;
j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
while( j>0
&& (j-1<iS2 || j>=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1]))
){
if( limit++ > 10 ){
j = 0;
break;
}
j = p->aTo[j-1].iNext;
}
if( j==0 ) continue;
assert( i>=iSXb && i>=iSXp );
if( i<iEXb && j>=iSYb && j<iEYb ) continue;
if( i<iEXp && j>=iSYp && j<iEYp ) continue;
iSX = i;
iSY = j-1;
while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
iSX--;
iSY--;
}
iEX = i+1;
iEY = j;
while( iEX<iE1 && iEY<iE2 && same_dline(&p->aFrom[iEX],&p->aTo[iEY]) ){
iEX++;
iEY++;
}
skew = (iSX-iS1) - (iSY-iS2);
if( skew<0 ) skew = -skew;
dist = (iSX+iEX)/2 - mid;
if( dist<0 ) dist = -dist;
score = (iEX - iSX) - 0.05*skew - 0.05*dist;
if( score>bestScore ){
bestScore = score;
iSXb = iSX;
iSYb = iSY;
iEXb = iEX;
iEYb = iEY;
}else{
iSXp = iSX;
iSYp = iSY;
iEXp = iEX;
iEYp = iEY;
}
}
*piSX = iSXb;
*piSY = iSYb;
*piEX = iEXb;
*piEY = iEYb;
/* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */
}
/*
** Do a single step in the difference. Compute a sequence of
** copy/delete/insert steps that will convert lines iS1 through iE1-1 of
|
| ︙ | ︙ | |||
389 390 391 392 393 394 395 396 397 398 399 400 401 402 |
appendTriple(p, iEX - iSX, 0, 0);
}
diff_step(p, iEX, iE1, iEY, iE2);
}else{
appendTriple(p, 0, iE1-iS1, iE2-iS2);
}
}
/*
** Generate a report of the differences between files pA and pB.
** If pOut is not NULL then a unified diff is appended there. It
** is assumed that pOut has already been initialized. If pOut is
** NULL, then a pointer to an array of integers is returned.
** The integers come in triples. For each triple,
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
appendTriple(p, iEX - iSX, 0, 0);
}
diff_step(p, iEX, iE1, iEY, iE2);
}else{
appendTriple(p, 0, iE1-iS1, iE2-iS2);
}
}
/*
** Compute the differences between two files already loaded into
** the DContext structure.
*/
static void diff_all(DContext *p){
int mnE, iS, iE1, iE2;
/* Carve off the common header and footer */
iE1 = p->nFrom;
iE2 = p->nTo;
while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
iE1--;
iE2--;
}
mnE = iE1<iE2 ? iE1 : iE2;
for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
/* do the difference */
if( iS>0 ){
appendTriple(p, iS, 0, 0);
}
diff_step(p, iS, iE1, iS, iE2);
if( iE1<p->nFrom ){
appendTriple(p, p->nFrom - iE1, 0, 0);
}
/* Terminate the COPY/DELETE/INSERT triples with three zeros */
expandEdit(p, p->nEdit+3);
if( p->aEdit ){
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
}
}
/*
** Generate a report of the differences between files pA and pB.
** If pOut is not NULL then a unified diff is appended there. It
** is assumed that pOut has already been initialized. If pOut is
** NULL, then a pointer to an array of integers is returned.
** The integers come in triples. For each triple,
|
| ︙ | ︙ | |||
411 412 413 414 415 416 417 |
int *text_diff(
Blob *pA_Blob, /* FROM file */
Blob *pB_Blob, /* TO file */
Blob *pOut, /* Write unified diff here if not NULL */
int nContext /* Amount of context to unified diff */
){
DContext c;
| < | > < < < < < < < < < < | < < < < < < < | < < < < < < | < | 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 |
int *text_diff(
Blob *pA_Blob, /* FROM file */
Blob *pB_Blob, /* TO file */
Blob *pOut, /* Write unified diff here if not NULL */
int nContext /* Amount of context to unified diff */
){
DContext c;
/* Prepare the input files */
memset(&c, 0, sizeof(c));
c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom);
c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo);
if( c.aFrom==0 || c.aTo==0 ){
free(c.aFrom);
free(c.aTo);
if( pOut ){
blob_appendf(pOut, "cannot compute difference between binary files\n");
}
return 0;
}
/* Compute the difference */
diff_all(&c);
if( pOut ){
/* Compute a context diff if requested */
contextDiff(&c, pOut, nContext);
free(c.aFrom);
free(c.aTo);
free(c.aEdit);
return 0;
}else{
/* If a context diff is not requested, then return the
** array of COPY/DELETE/INSERT triples.
*/
free(c.aFrom);
free(c.aTo);
return c.aEdit;
}
}
|
| ︙ | ︙ |