Fossil

Check-in [87f867018b]
Login

Check-in [87f867018b]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance optimization for the alignment calculation on side-by-side diffs. Noticably faster.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 87f867018b25a138b5e4df1d23b3c8e3a4c1a317
User & Date: drh 2012-02-07 18:58:34.115
Context
2012-02-07
20:04
Another minor performance enhancement on sbs diff. ... (check-in: 3e3feb2dda user: drh tags: trunk)
18:58
Performance optimization for the alignment calculation on side-by-side diffs. Noticably faster. ... (check-in: 87f867018b user: drh tags: trunk)
18:13
Optimize the inner loop of the LCS algorithm for the main diff generator. ... (check-in: 4ab6071145 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/diff.c.
531
532
533
534
535
536
537





538
539
540
541







542
543
544
545
546
547
548
549





550
551
552
553
554
555
556
557
558


559
560








561
562
563
564
565

566
567
568
569
570
571





572
573
574
575
576
577
578
    sbsWrite(p, " | ", 3);
    sbsWriteLineno(p, lnRight);
    p->iEnd = nRight - nSuffix;
    sbsWriteText(p, pRight, SBS_NEWLINE);
  }
}






/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
** completely different.







*/
static int match_dline(DLine *pA, DLine *pB){
  const char *zA;
  const char *zB;
  int nA;
  int nB;
  int avg;
  int i, j, k, best, score;






  zA = pA->z;
  zB = pB->z;
  nA = pA->h & LENGTH_MASK;
  nB = pB->h & LENGTH_MASK;
  while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
  while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
  while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
  while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }


  avg = (nA+nB)/2;
  if( avg==0 ) return 0;








  best = 0;
  for(i=0; i<nA-best; i++){
    char c = zA[i];
    for(j=0; j<nB-best; j++){
      if( c!=zB[j] ) continue;

      for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){}
      if( k>best ) best = k;
    }
  }
  score = (best>avg) ? 0 : (avg - best)*100/avg;






  /* Return the result */
  return score;
}

/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right.  This routine computes







>
>
>
>
>




>
>
>
>
>
>
>


|
|
|
|
|
|
>
>
>
>
>









>
>


>
>
>
>
>
>
>
>

|
|
|
<
>
|





>
>
>
>
>







531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591

592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
    sbsWrite(p, " | ", 3);
    sbsWriteLineno(p, lnRight);
    p->iEnd = nRight - nSuffix;
    sbsWriteText(p, pRight, SBS_NEWLINE);
  }
}

/*
** Minimum of two values
*/
static int minInt(int a, int b){ return a<b ? a : b; }

/*
** Return the number between 0 and 100 that is smaller the closer pA and
** pB match.  Return 0 for a perfect match.  Return 100 if pA and pB are
** completely different.
**
** The current algorithm is as follows:
**
** (1) Remove leading and trailing whitespace.
** (2) Truncate both strings to at most 250 characters
** (3) Find the length of the longest common subsequence
** (4) Longer common subsequences yield lower scores.
*/
static int match_dline(DLine *pA, DLine *pB){
  const char *zA;            /* Left string */
  const char *zB;            /* right string */
  int nA;                    /* Bytes in zA[] */
  int nB;                    /* Bytes in zB[] */
  int avg;                   /* Average length of A and B */
  int i, j, k;               /* Loop counters */
  int best = 0;              /* Longest match found so far */
  int score;                 /* Final score.  0..100 */
  unsigned char c;           /* Character being examined */
  unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
  unsigned char aNext[252];  /* aNext[i] = index in zB[] of next zB[i] char */

  zA = pA->z;
  zB = pB->z;
  nA = pA->h & LENGTH_MASK;
  nB = pB->h & LENGTH_MASK;
  while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
  while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
  while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
  while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
  if( nA>250 ) nA = 250;
  if( nB>250 ) nB = 250;
  avg = (nA+nB)/2;
  if( avg==0 ) return 0;
  memset(aFirst, 0, sizeof(aFirst));
  memset(aNext, 0, nB);
  zA--; zB--;   /* Make both zA[] and zB[] 1-indexed */
  for(i=nB; i>0; i--){
    c = (unsigned char)zB[i];
    aNext[i] = aFirst[c];
    aFirst[c] = i;
  }
  best = 0;
  for(i=1; i<=nA; i++){
    c = (unsigned char)zA[i];
    for(j=aFirst[c]; j>0; j = aNext[j]){

      int limit = minInt(nA-i, nB-j)+1;
      for(k=1; k<limit && zA[k+i]==zB[k+j]; k++){}
      if( k>best ) best = k;
    }
  }
  score = (best>avg) ? 0 : (avg - best)*100/avg;

#if 0
  fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
  nA, zA+1, nB, zB+1, best, avg, score);
#endif

  /* Return the result */
  return score;
}

/*
** There is a change block in which nLeft lines of text on the left are
** converted into nRight lines of text on the right.  This routine computes
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
  }
  *piSX = iSXb;
  *piEX = iSXb + mxLength;
  *piSY = iSYb;
  *piEY = iSYb + mxLength;
}

/*
** Minimum of two values
*/
static int minInt(int a, int b){ return a<b ? a : b; }

/*
** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[]
** file and lines iS2 through iE2-1 of the aTo[] file.  Locate a sequence
** of lines in these two blocks that are exactly the same.  Return
** the bounds of the matching sequence.
**
** If there are two or more possible answers of the same length, the







<
<
<
<
<







927
928
929
930
931
932
933





934
935
936
937
938
939
940
  }
  *piSX = iSXb;
  *piEX = iSXb + mxLength;
  *piSY = iSYb;
  *piEY = iSYb + mxLength;
}






/*
** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[]
** file and lines iS2 through iE2-1 of the aTo[] file.  Locate a sequence
** of lines in these two blocks that are exactly the same.  Return
** the bounds of the matching sequence.
**
** If there are two or more possible answers of the same length, the