Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Add the "diff optimizer" which tries to shift inserts and deletes to align with natural boundaries in the text. The resulting diff is no more or less correct than the original; it just seems more natural to human readers. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
98cf5c33bc1529e44ecde9fb5dbc680a |
| User & Date: | drh 2012-02-05 20:22:48.518 |
Context
|
2012-02-06
| ||
| 15:21 | Merge the diff enhancements from the diff-experimental branch into trunk. ... (check-in: bba7aea8ca user: drh tags: trunk) | |
| 01:55 | Trying out a greedy algorithm for aligning the two sides of a change with side-by-side diff. This helps in some cases, but we could probably benefit from a better algorithm. ... (check-in: 881b65141b user: drh tags: diff-experimental) | |
|
2012-02-05
| ||
| 20:22 | Add the "diff optimizer" which tries to shift inserts and deletes to align with natural boundaries in the text. The resulting diff is no more or less correct than the original; it just seems more natural to human readers. ... (check-in: 98cf5c33bc user: drh tags: trunk) | |
| 17:19 | Rearrange code and edit comments in diff logic, for clarity of presentation. No functional changes. ... (check-in: 032da543f0 user: drh tags: trunk) | |
Changes
Changes to src/diff.c.
| ︙ | ︙ | |||
31 32 33 34 35 36 37 38 39 40 41 42 43 44 | #define DIFF_WIDTH_MASK 0x00ff0000 /* side-by-side column width */ #define DIFF_IGNORE_EOLWS 0x01000000 /* Ignore end-of-line whitespace */ #define DIFF_SIDEBYSIDE 0x02000000 /* Generate a side-by-side diff */ #define DIFF_NEWFILE 0x04000000 /* Missing files are as empty files */ #define DIFF_INLINE 0x08000000 /* Inline (not side-by-side) diff */ #define DIFF_HTML 0x10000000 /* Render for HTML */ #define DIFF_LINENO 0x20000000 /* Show line numbers in context diff */ #endif /* INTERFACE */ /* ** Maximum length of a line in a text file. (8192) */ #define LENGTH_MASK_SZ 13 | > > | 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | #define DIFF_WIDTH_MASK 0x00ff0000 /* side-by-side column width */ #define DIFF_IGNORE_EOLWS 0x01000000 /* Ignore end-of-line whitespace */ #define DIFF_SIDEBYSIDE 0x02000000 /* Generate a side-by-side diff */ #define DIFF_NEWFILE 0x04000000 /* Missing files are as empty files */ #define DIFF_INLINE 0x08000000 /* Inline (not side-by-side) diff */ #define DIFF_HTML 0x10000000 /* Render for HTML */ #define DIFF_LINENO 0x20000000 /* Show line numbers in context diff */ #define DIFF_NOOPT 0x40000000 /* Suppress optimizations for debug */ #define DIFF_INVERT 0x80000000 /* Invert the diff for debug */ #endif /* INTERFACE */ /* ** Maximum length of a line in a text file. (8192) */ #define LENGTH_MASK_SZ 13 |
| ︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 68 69 70 71 72 | /* an array of DLine elements services two purposes. The fields ** above are one per line of input text. But each entry is also ** a bucket in a hash table, as follows: */ unsigned int iHash; /* 1+(first entry in the hash chain) */ }; /* ** A context for running a raw diff. ** ** The aEdit[] array describes the raw diff. Each triple of integers in ** aEdit[] means: ** ** (1) COPY: Number of lines aFrom and aTo have in common | > > > > > | 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | /* an array of DLine elements services two purposes. The fields ** above are one per line of input text. But each entry is also ** a bucket in a hash table, as follows: */ unsigned int iHash; /* 1+(first entry in the hash chain) */ }; /* ** Length of a dline */ #define LENGTH(X) ((X)->h & LENGTH_MASK) /* ** A context for running a raw diff. ** ** The aEdit[] array describes the raw diff. Each triple of integers in ** aEdit[] means: ** ** (1) COPY: Number of lines aFrom and aTo have in common |
| ︙ | ︙ | |||
874 875 876 877 878 879 880 881 882 883 884 885 886 887 |
expandEdit(p, p->nEdit+3);
if( p->aEdit ){
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
}
}
/*
** Extract the number of lines of context from diffFlags. Supply an
** appropriate default if no context width is specified.
*/
int diff_context_lines(int diffFlags){
int n = diffFlags & DIFF_CONTEXT_MASK;
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 |
expandEdit(p, p->nEdit+3);
if( p->aEdit ){
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
p->aEdit[p->nEdit++] = 0;
}
}
/*
** Attempt to shift insertion or deletion blocks so that they begin and
** end on lines that are pure whitespace. In other words, try to transform
** this:
**
** int func1(int x){
** return x*10;
** +}
** +
** +int func2(int x){
** + return x*20;
** }
**
** int func3(int x){
** return x/5;
** }
**
** Into one of these:
**
** int func1(int x){ int func1(int x){
** return x*10; return x*10;
** } }
** +
** +int func2(int x){ +int func2(int x){
** + return x*20; + return x*20;
** +} +}
** +
** int func3(int x){ int func3(int x){
** return x/5; return x/5;
** } }
*/
static void diff_optimize(DContext *p){
int r; /* Index of current triple */
int lnFrom; /* Line number in p->aFrom */
int lnTo; /* Line number in p->aTo */
int cpy, del, ins;
lnFrom = lnTo = 0;
for(r=0; r<p->nEdit; r += 3){
cpy = p->aEdit[r];
del = p->aEdit[r+1];
ins = p->aEdit[r+2];
lnFrom += cpy;
lnTo += cpy;
/* Shift insertions toward the beginning of the file */
while( cpy>0 && del==0 && ins>0 ){
DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of insert */
DLine *pBtm = &p->aTo[lnTo+ins-1]; /* Last line inserted */
if( same_dline(pTop, pBtm)==0 ) break;
if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break;
lnFrom--;
lnTo--;
p->aEdit[r]--;
p->aEdit[r+3]++;
cpy--;
}
/* Shift insertions toward the end of the file */
while( p->aEdit[r+3]>0 && del==0 && ins>0 ){
DLine *pTop = &p->aTo[lnTo]; /* First line inserted */
DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */
if( same_dline(pTop, pBtm)==0 ) break;
if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break;
lnFrom++;
lnTo++;
p->aEdit[r]++;
p->aEdit[r+3]--;
cpy++;
}
/* Shift deletions toward the beginning of the file */
while( cpy>0 && del>0 && ins==0 ){
DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of delete */
DLine *pBtm = &p->aFrom[lnFrom+del-1]; /* Last line deleted */
if( same_dline(pTop, pBtm)==0 ) break;
if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break;
lnFrom--;
lnTo--;
p->aEdit[r]--;
p->aEdit[r+3]++;
cpy--;
}
/* Shift deletions toward the end of the file */
while( p->aEdit[r+3]>0 && del>0 && ins==0 ){
DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */
DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */
if( same_dline(pTop, pBtm)==0 ) break;
if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break;
lnFrom++;
lnTo++;
p->aEdit[r]++;
p->aEdit[r+3]--;
cpy++;
}
lnFrom += del;
lnTo += ins;
}
}
/*
** Extract the number of lines of context from diffFlags. Supply an
** appropriate default if no context width is specified.
*/
int diff_context_lines(int diffFlags){
int n = diffFlags & DIFF_CONTEXT_MASK;
|
| ︙ | ︙ | |||
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 |
Blob *pOut, /* Write diff here if not NULL */
int diffFlags /* DIFF_* flags defined above */
){
int ignoreEolWs; /* Ignore whitespace at the end of lines */
int nContext; /* Amount of context to display */
DContext c;
nContext = diff_context_lines(diffFlags);
ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
/* Prepare the input files */
memset(&c, 0, sizeof(c));
c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
&c.nFrom, ignoreEolWs);
c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
&c.nTo, ignoreEolWs);
if( c.aFrom==0 || c.aTo==0 ){
free(c.aFrom);
free(c.aTo);
if( pOut ){
blob_appendf(pOut, "cannot compute difference between binary files\n");
}
return 0;
}
/* Compute the difference */
diff_all(&c);
if( pOut ){
/* Compute a context or side-by-side diff into pOut */
int escHtml = (diffFlags & DIFF_HTML)!=0;
if( diffFlags & DIFF_SIDEBYSIDE ){
int width = diff_width(diffFlags);
sbsDiff(&c, pOut, nContext, width, escHtml);
| > > > > > > | 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 |
Blob *pOut, /* Write diff here if not NULL */
int diffFlags /* DIFF_* flags defined above */
){
int ignoreEolWs; /* Ignore whitespace at the end of lines */
int nContext; /* Amount of context to display */
DContext c;
if( diffFlags & DIFF_INVERT ){
Blob *pTemp = pA_Blob;
pA_Blob = pB_Blob;
pB_Blob = pTemp;
}
nContext = diff_context_lines(diffFlags);
ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
/* Prepare the input files */
memset(&c, 0, sizeof(c));
c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
&c.nFrom, ignoreEolWs);
c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
&c.nTo, ignoreEolWs);
if( c.aFrom==0 || c.aTo==0 ){
free(c.aFrom);
free(c.aTo);
if( pOut ){
blob_appendf(pOut, "cannot compute difference between binary files\n");
}
return 0;
}
/* Compute the difference */
diff_all(&c);
if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
if( pOut ){
/* Compute a context or side-by-side diff into pOut */
int escHtml = (diffFlags & DIFF_HTML)!=0;
if( diffFlags & DIFF_SIDEBYSIDE ){
int width = diff_width(diffFlags);
sbsDiff(&c, pOut, nContext, width, escHtml);
|
| ︙ | ︙ | |||
964 965 966 967 968 969 970 |
*/
free(c.aFrom);
free(c.aTo);
return c.aEdit;
}
}
| < < < < < < < < < < < < < < < < < < < < < < | 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 |
*/
free(c.aFrom);
free(c.aTo);
return c.aEdit;
}
}
/*
** Process diff-related command-line options and return an appropriate
** "diffFlags" integer.
**
** --side-by-side|-y Side-by-side diff. DIFF_SIDEBYSIDE
** --context|-c N N lines of context. DIFF_CONTEXT_MASK
** --width|-W N N character lines. DIFF_WIDTH_MASK
|
| ︙ | ︙ | |||
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 |
if( (z = find_option("width","W",1))!=0 && (f = atoi(z))>0 ){
f *= DIFF_CONTEXT_MASK+1;
if( f > DIFF_WIDTH_MASK ) f = DIFF_CONTEXT_MASK;
diffFlags |= f;
}
if( find_option("html",0,0)!=0 ) diffFlags |= DIFF_HTML;
if( find_option("linenum","n",0)!=0 ) diffFlags |= DIFF_LINENO;
return diffFlags;
}
/*
** COMMAND: test-udiff
**
** Print the difference between two files. The usual diff options apply.
*/
void test_udiff_cmd(void){
| > > > > > > > > > > > > > > > > > > > > > > > > > | 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 |
if( (z = find_option("width","W",1))!=0 && (f = atoi(z))>0 ){
f *= DIFF_CONTEXT_MASK+1;
if( f > DIFF_WIDTH_MASK ) f = DIFF_CONTEXT_MASK;
diffFlags |= f;
}
if( find_option("html",0,0)!=0 ) diffFlags |= DIFF_HTML;
if( find_option("linenum","n",0)!=0 ) diffFlags |= DIFF_LINENO;
if( find_option("noopt",0,0)!=0 ) diffFlags |= DIFF_NOOPT;
if( find_option("invert",0,0)!=0 ) diffFlags |= DIFF_INVERT;
return diffFlags;
}
/*
** COMMAND: test-rawdiff
*/
void test_rawdiff_cmd(void){
Blob a, b;
int r;
int i;
int *R;
int diffFlags = diff_options();
if( g.argc<4 ) usage("FILE1 FILE2 ...");
blob_read_from_file(&a, g.argv[2]);
for(i=3; i<g.argc; i++){
if( i>3 ) fossil_print("-------------------------------\n");
blob_read_from_file(&b, g.argv[i]);
R = text_diff(&a, &b, 0, diffFlags);
for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
}
/* free(R); */
blob_reset(&b);
}
}
/*
** COMMAND: test-udiff
**
** Print the difference between two files. The usual diff options apply.
*/
void test_udiff_cmd(void){
|
| ︙ | ︙ |
Changes to src/info.c.
| ︙ | ︙ | |||
371 372 373 374 375 376 377 378 379 380 381 382 383 384 |
diffFlags = DIFF_INLINE | DIFF_IGNORE_EOLWS;
}
/* "dc" query parameter determines lines of context */
x = atoi(PD("dc","7"));
if( x<0 || x>DIFF_CONTEXT_MASK ) x = DIFF_CONTEXT_MASK;
diffFlags += x;
}
return diffFlags;
}
/*
** WEBPAGE: vinfo
| > > > | 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 |
diffFlags = DIFF_INLINE | DIFF_IGNORE_EOLWS;
}
/* "dc" query parameter determines lines of context */
x = atoi(PD("dc","7"));
if( x<0 || x>DIFF_CONTEXT_MASK ) x = DIFF_CONTEXT_MASK;
diffFlags += x;
/* The "noopt" parameter disables diff optimization */
if( PD("noopt",0)!=0 ) diffFlags |= DIFF_NOOPT;
}
return diffFlags;
}
/*
** WEBPAGE: vinfo
|
| ︙ | ︙ |