Check-in [522104c2cd]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to the delta generator algorthm so that it runs much faster on large files with very few similarities. There is no change to the delta format generated, so this is fully backwards and forwards compatible.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 522104c2cd244485f6a952641c05777864f888f8
User & Date: drh 2009-03-24 22:03:56.000
Context
2009-03-29
22:24
Use "no-store" in place of "private" as the cache-control mode. Ticket [b465b3bc2ceef4446b2ae770242ed0968e4dbc68]. check-in: 5ffc720194 user: drh tags: trunk
2009-03-26
15:32
Incremental changes toward encrypting sync traffic. The changes are incomplete, but all legacy functionality appears to still works. check-in: 5468ec7c5e user: drh tags: experimental
2009-03-24
22:03
Improvements to the delta generator algorthm so that it runs much faster on large files with very few similarities. There is no change to the delta format generated, so this is fully backwards and forwards compatible. check-in: 522104c2cd user: drh tags: trunk
2009-03-22
13:44
Fix a C89 violation in main.c. check-in: d63f87c003 user: drh tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/delta.c.
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
    case 2:   sum += (z[1] << 16);
    case 1:   sum += (z[0] << 24);
    default:  ;
  }
  return sum;
}

/*
** Maximum number of landmarks to set in the source file.
*/
#define MX_LANDMARK (1024*128)

/*
** Create a new delta.
**
** The delta is written into a preallocated buffer, zDelta, which 
** should be at least 60 bytes longer than the target file, zOut.
** The delta string will be NUL-terminated, but it might also contain
** embedded NUL characters if either the zSrc or zOut files are







<
<
<
<
<







219
220
221
222
223
224
225





226
227
228
229
230
231
232
    case 2:   sum += (z[1] << 16);
    case 1:   sum += (z[0] << 24);
    default:  ;
  }
  return sum;
}






/*
** Create a new delta.
**
** The delta is written into a preallocated buffer, zDelta, which 
** should be at least 60 bytes longer than the target file, zOut.
** The delta string will be NUL-terminated, but it might also contain
** embedded NUL characters if either the zSrc or zOut files are
295
296
297
298
299
300
301


302
303
304
305
306
307
308
309
310
311
  const char *zOut,      /* The target file */
  unsigned int lenOut,   /* Length of the target file */
  char *zDelta           /* Write the delta into this buffer */
){
  int i, base;
  char *zOrigDelta = zDelta;
  hash h;


  int *collide;
  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */
  int landmark[MX_LANDMARK];

  /* Add the target file size to the beginning of the delta
  */
  putInt(lenOut, &zDelta);
  *(zDelta++) = '\n';

  /* If the source file is very small, it means that we have no







>
>
|

<







290
291
292
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
  const char *zOut,      /* The target file */
  unsigned int lenOut,   /* Length of the target file */
  char *zDelta           /* Write the delta into this buffer */
){
  int i, base;
  char *zOrigDelta = zDelta;
  hash h;
  int nHash;                 /* Number of hash table entries */
  int *landmark;             /* Primary hash table */
  int *collide;              /* Collision chain */
  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */


  /* Add the target file size to the beginning of the delta
  */
  putInt(lenOut, &zDelta);
  *(zDelta++) = '\n';

  /* If the source file is very small, it means that we have no
321
322
323
324
325
326
327

328
329

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
    *(zDelta++) = ';';
    return zDelta - zOrigDelta;
  }

  /* Compute the hash table used to locate matching sections in the
  ** source file.
  */

  collide = malloc( lenSrc*sizeof(int)/NHASH );
  if( collide==0 ) return -1;

  memset(landmark, -1, sizeof(landmark));
  memset(collide, -1, lenSrc*sizeof(int)/NHASH );
  for(i=0; i<lenSrc-NHASH; i+=NHASH){
    int hv;
    hash_init(&h, &zSrc[i]);
    hv = hash_32bit(&h) & (MX_LANDMARK-1);
    collide[i/NHASH] = landmark[hv];
    landmark[hv] = i/NHASH;
  }

  /* Begin scanning the target file and generating copy commands and
  ** literal sections of the delta.
  */
  base = 0;    /* We have already generated everything before zOut[base] */
  while( base+NHASH<lenOut ){
    int iSrc, iBlock;
    unsigned int bestCnt, bestOfst=0, bestLitsz=0;
    hash_init(&h, &zOut[base]);
    i = 0;     /* Trying to match a landmark against zOut[base+i] */
    bestCnt = 0;
    while( 1 ){
      int hv;
      int limit = 250;

      hv = hash_32bit(&h) & (MX_LANDMARK-1);
      DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
      iBlock = landmark[hv];
      while( iBlock>=0 && (limit--)>0 ){
        /*
        ** The hash window has identified a potential match against 
        ** landmark block iBlock.  But we need to investigate further.
        ** 







>
|

>
|
|



|


















|







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
    *(zDelta++) = ';';
    return zDelta - zOrigDelta;
  }

  /* Compute the hash table used to locate matching sections in the
  ** source file.
  */
  nHash = lenSrc/NHASH;
  collide = malloc( nHash*2*sizeof(int) );
  if( collide==0 ) return -1;
  landmark = &collide[nHash];
  memset(landmark, -1, nHash*sizeof(int));
  memset(collide, -1, nHash*sizeof(int));
  for(i=0; i<lenSrc-NHASH; i+=NHASH){
    int hv;
    hash_init(&h, &zSrc[i]);
    hv = hash_32bit(&h) % nHash;
    collide[i/NHASH] = landmark[hv];
    landmark[hv] = i/NHASH;
  }

  /* Begin scanning the target file and generating copy commands and
  ** literal sections of the delta.
  */
  base = 0;    /* We have already generated everything before zOut[base] */
  while( base+NHASH<lenOut ){
    int iSrc, iBlock;
    unsigned int bestCnt, bestOfst=0, bestLitsz=0;
    hash_init(&h, &zOut[base]);
    i = 0;     /* Trying to match a landmark against zOut[base+i] */
    bestCnt = 0;
    while( 1 ){
      int hv;
      int limit = 250;

      hv = hash_32bit(&h) % nHash;
      DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
      iBlock = landmark[hv];
      while( iBlock>=0 && (limit--)>0 ){
        /*
        ** The hash window has identified a potential match against 
        ** landmark block iBlock.  But we need to investigate further.
        **