Fossil

Check-in [469462b69a]
Login

Check-in [469462b69a]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Convert the similarity measure for side-by-side diff alignment to use LCS instead of edit distance. LCS is faster and gives comparable results.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 469462b69a560074cac7701f0d46cc30086e698e
User & Date: drh 2012-02-07 03:57:39.925
Context
2012-02-07
04:15
Add chunk number fragment marks to HTML diff output. ... (check-in: b1530c29ab user: drh tags: trunk)
03:57
Convert the similarity measure for side-by-side diff alignment to use LCS instead of edit distance. LCS is faster and gives comparable results. ... (check-in: 469462b69a user: drh tags: trunk)
00:23
Allow file: clones that transfer private branches. ... (check-in: 8f85286cff user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/diff.c.
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614

/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
** completely different.
*/
static int match_dline(DLine *pA, DLine *pB){
  int *pToFree;
  int *a;
  const char *zA;
  const char *zB;
  int nA;
  int nB;
  int avg;
  int i, j, dist, score;
  int aStatic[200];

  zA = pA->z;
  zB = pB->z;
  nA = pA->h & LENGTH_MASK;
  nB = pB->h & LENGTH_MASK;
  while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
  while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
  while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
  while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
  avg = (nA+nB)/2;
  if( avg==0 ) return 100;
  dist = 0;

  /* Remove any common prefix and suffix */
  while( nA && nB && zA[0]==zB[0] ){
    nA--;
    nB--;
    zA++;
    zB++;
    dist++;
  }
  while( nA && nB && zA[nA-1]==zB[nB-1] ){
    nA--;
    nB--;
    dist++;
  }

  if( nA>0 && nB>0 ){
    /* Allocate space of the dynamic programming array */  
    if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){
      pToFree = 0;
      a = aStatic;
    }else{
      pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) );
    }

    /* Compute the length of the best sequence of matching characters */
    for(i=0; i<=nB; i++) a[i] = 0;
    for(j=0; j<nA; j++){
      int p = 0;
      for(i=0; i<nB; i++){
        int m = a[i];
        if( m<a[i+1] ) m = a[i+1];
        if( m<p+1 && zA[j]==zB[i] ) m = p+1;
        p = a[i+1];
        a[i+1] = m;
      }
    }
    dist += a[nB];
    fossil_free(pToFree);
  }
  score = dist>avg ? 0 : (avg - dist)*100/avg;

  /* Return the result */
  return score;
}



/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right.  This routine computes
** how the lines on the left line up with the lines on the right.
**
** The return value is a buffer of unsigned characters, obtained from







<
<





|
<











|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
|
|
<
|
|
<
<
<
|




<
<







534
535
536
537
538
539
540


541
542
543
544
545
546

547
548
549
550
551
552
553
554
555
556
557
558
559













560











561



562
563
564

565
566



567
568
569
570
571


572
573
574
575
576
577
578

/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
** completely different.
*/
static int match_dline(DLine *pA, DLine *pB){


  const char *zA;
  const char *zB;
  int nA;
  int nB;
  int avg;
  int i, j, k, best, score;


  zA = pA->z;
  zB = pB->z;
  nA = pA->h & LENGTH_MASK;
  nB = pB->h & LENGTH_MASK;
  while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
  while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
  while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
  while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
  avg = (nA+nB)/2;
  if( avg==0 ) return 100;
  best = 0;
  for(i=0; i<nA-best; i++){













    char c = zA[i];











    for(j=0; j<nB-best; j++){



      if( c!=zB[j] ) continue;
      for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){}
      if( k>best ) best = k;

    }
  }



  score = (best>avg) ? 0 : (avg - best)*100/avg;

  /* Return the result */
  return score;
}



/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right.  This routine computes
** how the lines on the left line up with the lines on the right.
**
** The return value is a buffer of unsigned characters, obtained from